prism 0.13.0 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -1
- data/README.md +4 -1
- data/config.yml +10 -14
- data/docs/fuzzing.md +5 -10
- data/docs/prism.png +0 -0
- data/docs/serialization.md +10 -0
- data/ext/prism/api_node.c +35 -28
- data/ext/prism/extension.c +35 -48
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +38 -36
- data/include/prism/node.h +1 -0
- data/include/prism/parser.h +26 -0
- data/include/prism/util/pm_buffer.h +3 -0
- data/include/prism/util/pm_constant_pool.h +5 -0
- data/include/prism/util/pm_string.h +2 -1
- data/include/prism/version.h +2 -2
- data/include/prism.h +0 -1
- data/lib/prism/compiler.rb +141 -141
- data/lib/prism/ffi.rb +2 -2
- data/lib/prism/lex_compat.rb +42 -8
- data/lib/prism/node.rb +1456 -46
- data/lib/prism/node_ext.rb +44 -0
- data/lib/prism/parse_result.rb +32 -5
- data/lib/prism/pattern.rb +1 -1
- data/lib/prism/serialize.rb +16 -14
- data/prism.gemspec +2 -3
- data/src/diagnostic.c +1 -1
- data/src/node.c +0 -14
- data/src/prettyprint.c +35 -35
- data/src/prism.c +1728 -811
- data/src/serialize.c +45 -22
- data/src/util/pm_buffer.c +9 -7
- metadata +3 -4
- data/include/prism/unescape.h +0 -48
- data/src/unescape.c +0 -637
data/src/prism.c
CHANGED
@@ -421,6 +421,63 @@ debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * call
|
|
421
421
|
#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
|
422
422
|
#endif
|
423
423
|
|
424
|
+
/******************************************************************************/
|
425
|
+
/* Diagnostic-related functions */
|
426
|
+
/******************************************************************************/
|
427
|
+
|
428
|
+
// Append an error to the list of errors on the parser.
|
429
|
+
static inline void
|
430
|
+
pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
|
431
|
+
pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
|
432
|
+
}
|
433
|
+
|
434
|
+
// Append an error to the list of errors on the parser using the location of the
|
435
|
+
// current token.
|
436
|
+
static inline void
|
437
|
+
pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
438
|
+
pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
|
439
|
+
}
|
440
|
+
|
441
|
+
// Append an error to the list of errors on the parser using the given location.
|
442
|
+
static inline void
|
443
|
+
pm_parser_err_location(pm_parser_t *parser, const pm_location_t *location, pm_diagnostic_id_t diag_id) {
|
444
|
+
pm_parser_err(parser, location->start, location->end, diag_id);
|
445
|
+
}
|
446
|
+
|
447
|
+
// Append an error to the list of errors on the parser using the location of the
|
448
|
+
// given node.
|
449
|
+
static inline void
|
450
|
+
pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
|
451
|
+
pm_parser_err(parser, node->location.start, node->location.end, diag_id);
|
452
|
+
}
|
453
|
+
|
454
|
+
// Append an error to the list of errors on the parser using the location of the
|
455
|
+
// previous token.
|
456
|
+
static inline void
|
457
|
+
pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
458
|
+
pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
|
459
|
+
}
|
460
|
+
|
461
|
+
// Append an error to the list of errors on the parser using the location of the
|
462
|
+
// given token.
|
463
|
+
static inline void
|
464
|
+
pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
|
465
|
+
pm_parser_err(parser, token->start, token->end, diag_id);
|
466
|
+
}
|
467
|
+
|
468
|
+
// Append a warning to the list of warnings on the parser.
|
469
|
+
static inline void
|
470
|
+
pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
|
471
|
+
pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
|
472
|
+
}
|
473
|
+
|
474
|
+
// Append a warning to the list of warnings on the parser using the location of
|
475
|
+
// the given token.
|
476
|
+
static inline void
|
477
|
+
pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
|
478
|
+
pm_parser_warn(parser, token->start, token->end, diag_id);
|
479
|
+
}
|
480
|
+
|
424
481
|
/******************************************************************************/
|
425
482
|
/* Node-related functions */
|
426
483
|
/******************************************************************************/
|
@@ -437,6 +494,22 @@ pm_parser_constant_id_owned(pm_parser_t *parser, const uint8_t *start, size_t le
|
|
437
494
|
return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
|
438
495
|
}
|
439
496
|
|
497
|
+
// Retrieve the constant pool id for the given static literal C string.
|
498
|
+
static inline pm_constant_id_t
|
499
|
+
pm_parser_constant_id_static(pm_parser_t *parser, const char *start, size_t length) {
|
500
|
+
uint8_t *owned_copy;
|
501
|
+
if (length > 0) {
|
502
|
+
owned_copy = malloc(length);
|
503
|
+
memcpy(owned_copy, start, length);
|
504
|
+
} else {
|
505
|
+
owned_copy = malloc(1);
|
506
|
+
owned_copy[0] = '\0';
|
507
|
+
}
|
508
|
+
return pm_constant_pool_insert_owned(&parser->constant_pool, owned_copy, length);
|
509
|
+
// Does not work because the static literal cannot be serialized as an offset of source
|
510
|
+
// return pm_constant_pool_insert_shared(&parser->constant_pool, start, length);
|
511
|
+
}
|
512
|
+
|
440
513
|
// Retrieve the constant pool id for the given token.
|
441
514
|
static inline pm_constant_id_t
|
442
515
|
pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
|
@@ -582,12 +655,7 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
|
|
582
655
|
|
583
656
|
// If we didn't hit a case before this check, then at this point we need to
|
584
657
|
// add a syntax error.
|
585
|
-
|
586
|
-
&parser->error_list,
|
587
|
-
block->base.location.start,
|
588
|
-
block->base.location.end,
|
589
|
-
PM_ERR_ARGUMENT_UNEXPECTED_BLOCK
|
590
|
-
);
|
658
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
|
591
659
|
}
|
592
660
|
|
593
661
|
/******************************************************************************/
|
@@ -601,6 +669,7 @@ pm_scope_node_init(pm_node_t *node, pm_scope_node_t *scope) {
|
|
601
669
|
scope->base.location.start = node->location.start;
|
602
670
|
scope->base.location.end = node->location.end;
|
603
671
|
|
672
|
+
scope->ast_node = node;
|
604
673
|
scope->parameters = NULL;
|
605
674
|
scope->body = NULL;
|
606
675
|
pm_constant_id_list_init(&scope->locals);
|
@@ -626,6 +695,11 @@ pm_scope_node_init(pm_node_t *node, pm_scope_node_t *scope) {
|
|
626
695
|
scope->locals = cast->locals;
|
627
696
|
break;
|
628
697
|
}
|
698
|
+
case PM_FOR_NODE: {
|
699
|
+
pm_for_node_t *cast = (pm_for_node_t *)node;
|
700
|
+
scope->body = (pm_node_t *)cast->statements;
|
701
|
+
break;
|
702
|
+
}
|
629
703
|
case PM_LAMBDA_NODE: {
|
630
704
|
pm_lambda_node_t *cast = (pm_lambda_node_t *) node;
|
631
705
|
if (cast->parameters) scope->parameters = cast->parameters->parameters;
|
@@ -679,14 +753,14 @@ parse_decimal_number(pm_parser_t *parser, const uint8_t *start, const uint8_t *e
|
|
679
753
|
unsigned long value = strtoul(digits, &endptr, 10);
|
680
754
|
|
681
755
|
if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) {
|
682
|
-
|
756
|
+
pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
|
683
757
|
value = UINT32_MAX;
|
684
758
|
}
|
685
759
|
|
686
760
|
free(digits);
|
687
761
|
|
688
762
|
if (value > UINT32_MAX) {
|
689
|
-
|
763
|
+
pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
|
690
764
|
value = UINT32_MAX;
|
691
765
|
}
|
692
766
|
|
@@ -907,7 +981,7 @@ pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
|
|
907
981
|
|
908
982
|
// If the element is not a static literal, then the array is not a static
|
909
983
|
// literal. Turn that flag off.
|
910
|
-
if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || (element->flags & PM_NODE_FLAG_STATIC_LITERAL) == 0) {
|
984
|
+
if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || (element->flags & PM_NODE_FLAG_STATIC_LITERAL) == 0) {
|
911
985
|
node->base.flags &= (pm_node_flags_t) ~PM_NODE_FLAG_STATIC_LITERAL;
|
912
986
|
}
|
913
987
|
}
|
@@ -1051,8 +1125,10 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
|
|
1051
1125
|
end = key->location.end;
|
1052
1126
|
}
|
1053
1127
|
|
1128
|
+
// If the key and value of this assoc node are both static literals, then
|
1129
|
+
// we can mark this node as a static literal.
|
1054
1130
|
pm_node_flags_t flags = 0;
|
1055
|
-
if (value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE)) {
|
1131
|
+
if (value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)) {
|
1056
1132
|
flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
|
1057
1133
|
}
|
1058
1134
|
|
@@ -1341,7 +1417,8 @@ pm_call_node_create(pm_parser_t *parser) {
|
|
1341
1417
|
.opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
1342
1418
|
.arguments = NULL,
|
1343
1419
|
.closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
1344
|
-
.block = NULL
|
1420
|
+
.block = NULL,
|
1421
|
+
.name = 0
|
1345
1422
|
};
|
1346
1423
|
|
1347
1424
|
return node;
|
@@ -1369,7 +1446,7 @@ pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_
|
|
1369
1446
|
node->closing_loc = arguments->closing_loc;
|
1370
1447
|
node->block = arguments->block;
|
1371
1448
|
|
1372
|
-
|
1449
|
+
node->name = pm_parser_constant_id_static(parser, "[]", 2);
|
1373
1450
|
return node;
|
1374
1451
|
}
|
1375
1452
|
|
@@ -1388,7 +1465,7 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
|
|
1388
1465
|
pm_arguments_node_arguments_append(arguments, argument);
|
1389
1466
|
node->arguments = arguments;
|
1390
1467
|
|
1391
|
-
|
1468
|
+
node->name = pm_parser_constant_id_token(parser, operator);
|
1392
1469
|
return node;
|
1393
1470
|
}
|
1394
1471
|
|
@@ -1420,7 +1497,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
|
|
1420
1497
|
node->base.flags |= PM_CALL_NODE_FLAGS_SAFE_NAVIGATION;
|
1421
1498
|
}
|
1422
1499
|
|
1423
|
-
|
1500
|
+
node->name = pm_parser_constant_id_token(parser, message);
|
1424
1501
|
return node;
|
1425
1502
|
}
|
1426
1503
|
|
@@ -1447,7 +1524,7 @@ pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments
|
|
1447
1524
|
node->closing_loc = arguments->closing_loc;
|
1448
1525
|
node->block = arguments->block;
|
1449
1526
|
|
1450
|
-
|
1527
|
+
node->name = pm_parser_constant_id_token(parser, message);
|
1451
1528
|
return node;
|
1452
1529
|
}
|
1453
1530
|
|
@@ -1469,7 +1546,7 @@ pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *me
|
|
1469
1546
|
node->arguments = arguments->arguments;
|
1470
1547
|
node->closing_loc = arguments->closing_loc;
|
1471
1548
|
|
1472
|
-
|
1549
|
+
node->name = pm_parser_constant_id_static(parser, "!", 1);
|
1473
1550
|
return node;
|
1474
1551
|
}
|
1475
1552
|
|
@@ -1496,7 +1573,7 @@ pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token
|
|
1496
1573
|
node->base.flags |= PM_CALL_NODE_FLAGS_SAFE_NAVIGATION;
|
1497
1574
|
}
|
1498
1575
|
|
1499
|
-
|
1576
|
+
node->name = pm_parser_constant_id_static(parser, "call", 4);
|
1500
1577
|
return node;
|
1501
1578
|
}
|
1502
1579
|
|
@@ -1511,7 +1588,7 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *
|
|
1511
1588
|
node->receiver = receiver;
|
1512
1589
|
node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
|
1513
1590
|
|
1514
|
-
|
1591
|
+
node->name = pm_parser_constant_id_static(parser, name, strlen(name));
|
1515
1592
|
return node;
|
1516
1593
|
}
|
1517
1594
|
|
@@ -1524,7 +1601,7 @@ pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
|
|
1524
1601
|
node->base.location = PM_LOCATION_TOKEN_VALUE(message);
|
1525
1602
|
node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
|
1526
1603
|
|
1527
|
-
|
1604
|
+
node->name = pm_parser_constant_id_token(parser, message);
|
1528
1605
|
return node;
|
1529
1606
|
}
|
1530
1607
|
|
@@ -1537,17 +1614,18 @@ pm_call_node_variable_call_p(pm_call_node_t *node) {
|
|
1537
1614
|
|
1538
1615
|
// Initialize the read name by reading the write name and chopping off the '='.
|
1539
1616
|
static void
|
1540
|
-
pm_call_write_read_name_init(
|
1541
|
-
|
1542
|
-
|
1617
|
+
pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
|
1618
|
+
pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
|
1619
|
+
if (write_constant->length >= 1) {
|
1620
|
+
size_t length = write_constant->length - 1;
|
1543
1621
|
|
1544
1622
|
void *memory = malloc(length);
|
1545
|
-
memcpy(memory,
|
1623
|
+
memcpy(memory, write_constant->start, length);
|
1546
1624
|
|
1547
|
-
|
1625
|
+
*read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
|
1548
1626
|
} else {
|
1549
1627
|
// We can get here if the message was missing because of a syntax error.
|
1550
|
-
|
1628
|
+
*read_name = pm_parser_constant_id_static(parser, "", 0);
|
1551
1629
|
}
|
1552
1630
|
}
|
1553
1631
|
|
@@ -1573,13 +1651,13 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
|
|
1573
1651
|
.opening_loc = target->opening_loc,
|
1574
1652
|
.arguments = target->arguments,
|
1575
1653
|
.closing_loc = target->closing_loc,
|
1576
|
-
.read_name =
|
1654
|
+
.read_name = 0,
|
1577
1655
|
.write_name = target->name,
|
1578
1656
|
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
1579
1657
|
.value = value
|
1580
1658
|
};
|
1581
1659
|
|
1582
|
-
pm_call_write_read_name_init(&node->read_name, &node->write_name);
|
1660
|
+
pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
|
1583
1661
|
|
1584
1662
|
// Here we're going to free the target, since it is no longer necessary.
|
1585
1663
|
// However, we don't want to call `pm_node_destroy` because we want to keep
|
@@ -1610,14 +1688,14 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
|
|
1610
1688
|
.opening_loc = target->opening_loc,
|
1611
1689
|
.arguments = target->arguments,
|
1612
1690
|
.closing_loc = target->closing_loc,
|
1613
|
-
.read_name =
|
1691
|
+
.read_name = 0,
|
1614
1692
|
.write_name = target->name,
|
1615
1693
|
.operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
1616
1694
|
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
1617
1695
|
.value = value
|
1618
1696
|
};
|
1619
1697
|
|
1620
|
-
pm_call_write_read_name_init(&node->read_name, &node->write_name);
|
1698
|
+
pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
|
1621
1699
|
|
1622
1700
|
// Here we're going to free the target, since it is no longer necessary.
|
1623
1701
|
// However, we don't want to call `pm_node_destroy` because we want to keep
|
@@ -1649,13 +1727,13 @@ pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
|
|
1649
1727
|
.opening_loc = target->opening_loc,
|
1650
1728
|
.arguments = target->arguments,
|
1651
1729
|
.closing_loc = target->closing_loc,
|
1652
|
-
.read_name =
|
1730
|
+
.read_name = 0,
|
1653
1731
|
.write_name = target->name,
|
1654
1732
|
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
1655
1733
|
.value = value
|
1656
1734
|
};
|
1657
1735
|
|
1658
|
-
pm_call_write_read_name_init(&node->read_name, &node->write_name);
|
1736
|
+
pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
|
1659
1737
|
|
1660
1738
|
// Here we're going to free the target, since it is no longer necessary.
|
1661
1739
|
// However, we don't want to call `pm_node_destroy` because we want to keep
|
@@ -3372,11 +3450,20 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name,
|
|
3372
3450
|
return node;
|
3373
3451
|
}
|
3374
3452
|
|
3453
|
+
static inline bool
|
3454
|
+
token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
|
3455
|
+
return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
|
3456
|
+
}
|
3457
|
+
|
3375
3458
|
// Allocate and initialize a new LocalVariableTargetNode node.
|
3376
3459
|
static pm_local_variable_target_node_t *
|
3377
3460
|
pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) {
|
3378
3461
|
pm_local_variable_target_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_target_node_t);
|
3379
3462
|
|
3463
|
+
if (token_is_numbered_parameter(name->start, name->end)) {
|
3464
|
+
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
3465
|
+
}
|
3466
|
+
|
3380
3467
|
*node = (pm_local_variable_target_node_t) {
|
3381
3468
|
{
|
3382
3469
|
.type = PM_LOCAL_VARIABLE_TARGET_NODE,
|
@@ -3870,10 +3957,27 @@ pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, con
|
|
3870
3957
|
static pm_range_node_t *
|
3871
3958
|
pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
|
3872
3959
|
pm_range_node_t *node = PM_ALLOC_NODE(parser, pm_range_node_t);
|
3960
|
+
pm_node_flags_t flags = 0;
|
3961
|
+
|
3962
|
+
// Indicate that this node an exclusive range if the operator is `...`.
|
3963
|
+
if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
|
3964
|
+
flags |= PM_RANGE_FLAGS_EXCLUDE_END;
|
3965
|
+
}
|
3966
|
+
|
3967
|
+
// Indicate that this node is a static literal (i.e., can be compiled with
|
3968
|
+
// a putobject in CRuby) if the left and right are implicit nil, explicit
|
3969
|
+
// nil, or integers.
|
3970
|
+
if (
|
3971
|
+
(left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
|
3972
|
+
(right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
|
3973
|
+
) {
|
3974
|
+
flags |= PM_NODE_FLAG_STATIC_LITERAL;
|
3975
|
+
}
|
3873
3976
|
|
3874
3977
|
*node = (pm_range_node_t) {
|
3875
3978
|
{
|
3876
3979
|
.type = PM_RANGE_NODE,
|
3980
|
+
.flags = flags,
|
3877
3981
|
.location = {
|
3878
3982
|
.start = (left == NULL ? operator->start : left->location.start),
|
3879
3983
|
.end = (right == NULL ? operator->end : right->location.end)
|
@@ -3884,15 +3988,6 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope
|
|
3884
3988
|
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
|
3885
3989
|
};
|
3886
3990
|
|
3887
|
-
switch (operator->type) {
|
3888
|
-
case PM_TOKEN_DOT_DOT_DOT:
|
3889
|
-
case PM_TOKEN_UDOT_DOT_DOT:
|
3890
|
-
node->base.flags |= PM_RANGE_FLAGS_EXCLUDE_END;
|
3891
|
-
break;
|
3892
|
-
default:
|
3893
|
-
break;
|
3894
|
-
}
|
3895
|
-
|
3896
3991
|
return node;
|
3897
3992
|
}
|
3898
3993
|
|
@@ -3906,9 +4001,10 @@ pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
3906
4001
|
return node;
|
3907
4002
|
}
|
3908
4003
|
|
3909
|
-
// Allocate a new RegularExpressionNode node
|
4004
|
+
// Allocate a new initialize a new RegularExpressionNode node with the given
|
4005
|
+
// unescaped string.
|
3910
4006
|
static pm_regular_expression_node_t *
|
3911
|
-
|
4007
|
+
pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
|
3912
4008
|
pm_regular_expression_node_t *node = PM_ALLOC_NODE(parser, pm_regular_expression_node_t);
|
3913
4009
|
|
3914
4010
|
*node = (pm_regular_expression_node_t) {
|
@@ -3923,12 +4019,18 @@ pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening
|
|
3923
4019
|
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
|
3924
4020
|
.content_loc = PM_LOCATION_TOKEN_VALUE(content),
|
3925
4021
|
.closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
|
3926
|
-
.unescaped =
|
4022
|
+
.unescaped = *unescaped
|
3927
4023
|
};
|
3928
4024
|
|
3929
4025
|
return node;
|
3930
4026
|
}
|
3931
4027
|
|
4028
|
+
// Allocate a new initialize a new RegularExpressionNode node.
|
4029
|
+
static inline pm_regular_expression_node_t *
|
4030
|
+
pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
|
4031
|
+
return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_EMPTY_STRING);
|
4032
|
+
}
|
4033
|
+
|
3932
4034
|
// Allocate a new RequiredDestructuredParameterNode node.
|
3933
4035
|
static pm_required_destructured_parameter_node_t *
|
3934
4036
|
pm_required_destructured_parameter_node_create(pm_parser_t *parser, const pm_token_t *opening) {
|
@@ -4274,9 +4376,9 @@ pm_string_concat_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *ri
|
|
4274
4376
|
return node;
|
4275
4377
|
}
|
4276
4378
|
|
4277
|
-
// Allocate a new StringNode node.
|
4278
|
-
static pm_string_node_t *
|
4279
|
-
|
4379
|
+
// Allocate a new StringNode node with the current string on the parser.
|
4380
|
+
static inline pm_string_node_t *
|
4381
|
+
pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
|
4280
4382
|
pm_string_node_t *node = PM_ALLOC_NODE(parser, pm_string_node_t);
|
4281
4383
|
pm_node_flags_t flags = 0;
|
4282
4384
|
|
@@ -4296,12 +4398,27 @@ pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
|
|
4296
4398
|
.opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
|
4297
4399
|
.content_loc = PM_LOCATION_TOKEN_VALUE(content),
|
4298
4400
|
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
|
4299
|
-
.unescaped =
|
4401
|
+
.unescaped = *string
|
4300
4402
|
};
|
4301
4403
|
|
4302
4404
|
return node;
|
4303
4405
|
}
|
4304
4406
|
|
4407
|
+
// Allocate a new StringNode node.
|
4408
|
+
static pm_string_node_t *
|
4409
|
+
pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
|
4410
|
+
return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_EMPTY_STRING);
|
4411
|
+
}
|
4412
|
+
|
4413
|
+
// Allocate a new StringNode node and create it using the current string on the
|
4414
|
+
// parser.
|
4415
|
+
static pm_string_node_t *
|
4416
|
+
pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
|
4417
|
+
pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
|
4418
|
+
parser->current_string = PM_EMPTY_STRING;
|
4419
|
+
return node;
|
4420
|
+
}
|
4421
|
+
|
4305
4422
|
// Allocate and initialize a new SuperNode node.
|
4306
4423
|
static pm_super_node_t *
|
4307
4424
|
pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
|
@@ -4338,9 +4455,10 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
|
|
4338
4455
|
return node;
|
4339
4456
|
}
|
4340
4457
|
|
4341
|
-
// Allocate a new SymbolNode node
|
4458
|
+
// Allocate and initialize a new SymbolNode node with the given unescaped
|
4459
|
+
// string.
|
4342
4460
|
static pm_symbol_node_t *
|
4343
|
-
|
4461
|
+
pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped) {
|
4344
4462
|
pm_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
|
4345
4463
|
|
4346
4464
|
*node = (pm_symbol_node_t) {
|
@@ -4355,12 +4473,26 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
|
|
4355
4473
|
.opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
|
4356
4474
|
.value_loc = PM_LOCATION_TOKEN_VALUE(value),
|
4357
4475
|
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
|
4358
|
-
.unescaped =
|
4476
|
+
.unescaped = *unescaped
|
4359
4477
|
};
|
4360
4478
|
|
4361
4479
|
return node;
|
4362
4480
|
}
|
4363
4481
|
|
4482
|
+
// Allocate and initialize a new SymbolNode node.
|
4483
|
+
static inline pm_symbol_node_t *
|
4484
|
+
pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
4485
|
+
return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_EMPTY_STRING);
|
4486
|
+
}
|
4487
|
+
|
4488
|
+
// Allocate and initialize a new SymbolNode node with the current string.
|
4489
|
+
static pm_symbol_node_t *
|
4490
|
+
pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
4491
|
+
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string);
|
4492
|
+
parser->current_string = PM_EMPTY_STRING;
|
4493
|
+
return node;
|
4494
|
+
}
|
4495
|
+
|
4364
4496
|
// Allocate and initialize a new SymbolNode node from a label.
|
4365
4497
|
static pm_symbol_node_t *
|
4366
4498
|
pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
|
@@ -4376,8 +4508,6 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
4376
4508
|
|
4377
4509
|
assert((label.end - label.start) >= 0);
|
4378
4510
|
pm_string_shared_init(&node->unescaped, label.start, label.end);
|
4379
|
-
|
4380
|
-
pm_unescape_manipulate_string(parser, &node->unescaped, PM_UNESCAPE_ALL);
|
4381
4511
|
break;
|
4382
4512
|
}
|
4383
4513
|
case PM_TOKEN_MISSING: {
|
@@ -4710,9 +4840,10 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
|
|
4710
4840
|
return node;
|
4711
4841
|
}
|
4712
4842
|
|
4713
|
-
// Allocate and initialize a new XStringNode node
|
4843
|
+
// Allocate and initialize a new XStringNode node with the given unescaped
|
4844
|
+
// string.
|
4714
4845
|
static pm_x_string_node_t *
|
4715
|
-
|
4846
|
+
pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
|
4716
4847
|
pm_x_string_node_t *node = PM_ALLOC_NODE(parser, pm_x_string_node_t);
|
4717
4848
|
|
4718
4849
|
*node = (pm_x_string_node_t) {
|
@@ -4726,12 +4857,18 @@ pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_
|
|
4726
4857
|
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
|
4727
4858
|
.content_loc = PM_LOCATION_TOKEN_VALUE(content),
|
4728
4859
|
.closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
|
4729
|
-
.unescaped =
|
4860
|
+
.unescaped = *unescaped
|
4730
4861
|
};
|
4731
4862
|
|
4732
4863
|
return node;
|
4733
4864
|
}
|
4734
4865
|
|
4866
|
+
// Allocate and initialize a new XStringNode node.
|
4867
|
+
static inline pm_x_string_node_t *
|
4868
|
+
pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
|
4869
|
+
return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_EMPTY_STRING);
|
4870
|
+
}
|
4871
|
+
|
4735
4872
|
// Allocate a new YieldNode node.
|
4736
4873
|
static pm_yield_node_t *
|
4737
4874
|
pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
|
@@ -4765,8 +4902,6 @@ pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_lo
|
|
4765
4902
|
return node;
|
4766
4903
|
}
|
4767
4904
|
|
4768
|
-
|
4769
|
-
#undef PM_EMPTY_STRING
|
4770
4905
|
#undef PM_ALLOC_NODE
|
4771
4906
|
|
4772
4907
|
/******************************************************************************/
|
@@ -4783,7 +4918,8 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
4783
4918
|
.previous = parser->current_scope,
|
4784
4919
|
.closed = closed,
|
4785
4920
|
.explicit_params = false,
|
4786
|
-
.numbered_params = false
|
4921
|
+
.numbered_params = false,
|
4922
|
+
.transparent = false
|
4787
4923
|
};
|
4788
4924
|
|
4789
4925
|
pm_constant_id_list_init(&scope->locals);
|
@@ -4792,6 +4928,25 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
4792
4928
|
return true;
|
4793
4929
|
}
|
4794
4930
|
|
4931
|
+
// Allocate and initialize a new scope. Push it onto the scope stack.
|
4932
|
+
static bool
|
4933
|
+
pm_parser_scope_push_transparent(pm_parser_t *parser) {
|
4934
|
+
pm_scope_t *scope = (pm_scope_t *) malloc(sizeof(pm_scope_t));
|
4935
|
+
if (scope == NULL) return false;
|
4936
|
+
|
4937
|
+
*scope = (pm_scope_t) {
|
4938
|
+
.previous = parser->current_scope,
|
4939
|
+
.closed = false,
|
4940
|
+
.explicit_params = false,
|
4941
|
+
.numbered_params = false,
|
4942
|
+
.transparent = true
|
4943
|
+
};
|
4944
|
+
|
4945
|
+
parser->current_scope = scope;
|
4946
|
+
|
4947
|
+
return true;
|
4948
|
+
}
|
4949
|
+
|
4795
4950
|
// Check if the current scope has a given local variables.
|
4796
4951
|
static int
|
4797
4952
|
pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
|
@@ -4800,7 +4955,8 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
|
|
4800
4955
|
int depth = 0;
|
4801
4956
|
|
4802
4957
|
while (scope != NULL) {
|
4803
|
-
if (
|
4958
|
+
if (!scope->transparent &&
|
4959
|
+
pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
|
4804
4960
|
if (scope->closed) break;
|
4805
4961
|
|
4806
4962
|
scope = scope->previous;
|
@@ -4813,8 +4969,12 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
|
|
4813
4969
|
// Add a constant id to the local table of the current scope.
|
4814
4970
|
static inline void
|
4815
4971
|
pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id) {
|
4816
|
-
|
4817
|
-
|
4972
|
+
pm_scope_t *scope = parser->current_scope;
|
4973
|
+
while (scope && scope->transparent) scope = scope->previous;
|
4974
|
+
|
4975
|
+
assert(scope != NULL);
|
4976
|
+
if (!pm_constant_id_list_includes(&scope->locals, constant_id)) {
|
4977
|
+
pm_constant_id_list_append(&scope->locals, constant_id);
|
4818
4978
|
}
|
4819
4979
|
}
|
4820
4980
|
|
@@ -4839,18 +4999,13 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
|
|
4839
4999
|
if (constant_id != 0) pm_parser_local_add(parser, constant_id);
|
4840
5000
|
}
|
4841
5001
|
|
4842
|
-
static inline bool
|
4843
|
-
token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
|
4844
|
-
return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
|
4845
|
-
}
|
4846
|
-
|
4847
5002
|
// Add a parameter name to the current scope and check whether the name of the
|
4848
5003
|
// parameter is unique or not.
|
4849
5004
|
static void
|
4850
|
-
pm_parser_parameter_name_check(pm_parser_t *parser, pm_token_t *name) {
|
5005
|
+
pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
|
4851
5006
|
// We want to check whether the parameter name is a numbered parameter or not.
|
4852
5007
|
if (token_is_numbered_parameter(name->start, name->end)) {
|
4853
|
-
|
5008
|
+
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
4854
5009
|
}
|
4855
5010
|
|
4856
5011
|
// We want to ignore any parameter name that starts with an underscore.
|
@@ -4861,7 +5016,7 @@ pm_parser_parameter_name_check(pm_parser_t *parser, pm_token_t *name) {
|
|
4861
5016
|
pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
|
4862
5017
|
|
4863
5018
|
if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
|
4864
|
-
|
5019
|
+
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
|
4865
5020
|
}
|
4866
5021
|
}
|
4867
5022
|
|
@@ -5007,17 +5162,6 @@ peek(pm_parser_t *parser) {
|
|
5007
5162
|
return peek_at(parser, parser->current.end);
|
5008
5163
|
}
|
5009
5164
|
|
5010
|
-
// Get the next string of length len in the source starting from parser->current.end.
|
5011
|
-
// If the string extends beyond the end of the source, return the empty string ""
|
5012
|
-
static inline const uint8_t *
|
5013
|
-
peek_string(pm_parser_t *parser, size_t len) {
|
5014
|
-
if (parser->current.end + len <= parser->end) {
|
5015
|
-
return parser->current.end;
|
5016
|
-
} else {
|
5017
|
-
return (const uint8_t *) "";
|
5018
|
-
}
|
5019
|
-
}
|
5020
|
-
|
5021
5165
|
// If the character to be read matches the given value, then returns true and
|
5022
5166
|
// advanced the current pointer.
|
5023
5167
|
static inline bool
|
@@ -5069,66 +5213,17 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
|
|
5069
5213
|
return memchr(cursor, '\n', (size_t) length);
|
5070
5214
|
}
|
5071
5215
|
|
5072
|
-
// Find the start of the encoding comment. This is effectively an inlined
|
5073
|
-
// version of strnstr with some modifications.
|
5074
|
-
static inline const uint8_t *
|
5075
|
-
parser_lex_encoding_comment_start(pm_parser_t *parser, const uint8_t *cursor, ptrdiff_t remaining) {
|
5076
|
-
assert(remaining >= 0);
|
5077
|
-
size_t length = (size_t) remaining;
|
5078
|
-
|
5079
|
-
size_t key_length = strlen("coding:");
|
5080
|
-
if (key_length > length) return NULL;
|
5081
|
-
|
5082
|
-
const uint8_t *cursor_limit = cursor + length - key_length + 1;
|
5083
|
-
while ((cursor = pm_memchr(cursor, 'c', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
|
5084
|
-
if (memcmp(cursor, "coding", key_length - 1) == 0) {
|
5085
|
-
size_t whitespace_after_coding = pm_strspn_inline_whitespace(cursor + key_length - 1, parser->end - (cursor + key_length - 1));
|
5086
|
-
size_t cur_pos = key_length + whitespace_after_coding;
|
5087
|
-
|
5088
|
-
if (cursor[cur_pos - 1] == ':' || cursor[cur_pos - 1] == '=') {
|
5089
|
-
return cursor + cur_pos;
|
5090
|
-
}
|
5091
|
-
}
|
5092
|
-
|
5093
|
-
cursor++;
|
5094
|
-
}
|
5095
|
-
|
5096
|
-
return NULL;
|
5097
|
-
}
|
5098
|
-
|
5099
5216
|
// Here we're going to check if this is a "magic" comment, and perform whatever
|
5100
5217
|
// actions are necessary for it here.
|
5101
5218
|
static void
|
5102
|
-
|
5103
|
-
|
5104
|
-
const uint8_t *end = parser->current.end;
|
5105
|
-
|
5106
|
-
// These are the patterns we're going to match to find the encoding comment.
|
5107
|
-
// This is definitely not complete or even really correct.
|
5108
|
-
const uint8_t *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
|
5109
|
-
|
5110
|
-
// If we didn't find anything that matched our patterns, then return. Note
|
5111
|
-
// that this does a _very_ poor job of actually finding the encoding, and
|
5112
|
-
// there is a lot of work to do here to better reflect actual magic comment
|
5113
|
-
// parsing from CRuby, but this at least gets us part of the way there.
|
5114
|
-
if (encoding_start == NULL) return;
|
5115
|
-
|
5116
|
-
// Skip any non-newline whitespace after the "coding:" or "coding=".
|
5117
|
-
encoding_start += pm_strspn_inline_whitespace(encoding_start, end - encoding_start);
|
5118
|
-
|
5119
|
-
// Now determine the end of the encoding string. This is either the end of
|
5120
|
-
// the line, the first whitespace character, or a punctuation mark.
|
5121
|
-
const uint8_t *encoding_end = pm_strpbrk(parser, encoding_start, (const uint8_t *) " \t\f\r\v\n;,", end - encoding_start);
|
5122
|
-
encoding_end = encoding_end == NULL ? end : encoding_end;
|
5123
|
-
|
5124
|
-
// Finally, we can determine the width of the encoding string.
|
5125
|
-
size_t width = (size_t) (encoding_end - encoding_start);
|
5219
|
+
parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
5220
|
+
size_t width = (size_t) (end - start);
|
5126
5221
|
|
5127
5222
|
// First, we're going to call out to a user-defined callback if one was
|
5128
5223
|
// provided. If they return an encoding struct that we can use, then we'll
|
5129
5224
|
// use that here.
|
5130
5225
|
if (parser->encoding_decode_callback != NULL) {
|
5131
|
-
pm_encoding_t *encoding = parser->encoding_decode_callback(parser,
|
5226
|
+
pm_encoding_t *encoding = parser->encoding_decode_callback(parser, start, width);
|
5132
5227
|
|
5133
5228
|
if (encoding != NULL) {
|
5134
5229
|
parser->encoding = *encoding;
|
@@ -5140,7 +5235,7 @@ parser_lex_encoding_comment(pm_parser_t *parser) {
|
|
5140
5235
|
// Extensions like utf-8 can contain extra encoding details like,
|
5141
5236
|
// utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
|
5142
5237
|
// treat any encoding starting utf-8 as utf-8.
|
5143
|
-
if ((
|
5238
|
+
if ((start + 5 <= end) && (pm_strncasecmp(start, (const uint8_t *) "utf-8", 5) == 0)) {
|
5144
5239
|
// We don't need to do anything here because the default encoding is
|
5145
5240
|
// already UTF-8. We'll just return.
|
5146
5241
|
return;
|
@@ -5149,7 +5244,7 @@ parser_lex_encoding_comment(pm_parser_t *parser) {
|
|
5149
5244
|
// Next, we're going to loop through each of the encodings that we handle
|
5150
5245
|
// explicitly. If we found one that we understand, we'll use that value.
|
5151
5246
|
#define ENCODING(value, prebuilt) \
|
5152
|
-
if (width == sizeof(value) - 1 &&
|
5247
|
+
if (width == sizeof(value) - 1 && start + width <= end && pm_strncasecmp(start, (const uint8_t *) value, width) == 0) { \
|
5153
5248
|
parser->encoding = prebuilt; \
|
5154
5249
|
parser->encoding_changed |= true; \
|
5155
5250
|
if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
|
@@ -5198,40 +5293,220 @@ parser_lex_encoding_comment(pm_parser_t *parser) {
|
|
5198
5293
|
// didn't understand the encoding that the user was trying to use. In this
|
5199
5294
|
// case we'll keep using the default encoding but add an error to the
|
5200
5295
|
// parser to indicate an unsuccessful parse.
|
5201
|
-
|
5296
|
+
pm_parser_err(parser, start, end, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
|
5297
|
+
}
|
5298
|
+
|
5299
|
+
// Look for a specific pattern of "coding" and potentially set the encoding on
|
5300
|
+
// the parser.
|
5301
|
+
static void
|
5302
|
+
parser_lex_magic_comment_encoding(pm_parser_t *parser) {
|
5303
|
+
const uint8_t *cursor = parser->current.start + 1;
|
5304
|
+
const uint8_t *end = parser->current.end;
|
5305
|
+
|
5306
|
+
bool separator = false;
|
5307
|
+
while (true) {
|
5308
|
+
if (end - cursor <= 6) return;
|
5309
|
+
switch (cursor[6]) {
|
5310
|
+
case 'C': case 'c': cursor += 6; continue;
|
5311
|
+
case 'O': case 'o': cursor += 5; continue;
|
5312
|
+
case 'D': case 'd': cursor += 4; continue;
|
5313
|
+
case 'I': case 'i': cursor += 3; continue;
|
5314
|
+
case 'N': case 'n': cursor += 2; continue;
|
5315
|
+
case 'G': case 'g': cursor += 1; continue;
|
5316
|
+
case '=': case ':':
|
5317
|
+
separator = true;
|
5318
|
+
cursor += 6;
|
5319
|
+
break;
|
5320
|
+
default:
|
5321
|
+
cursor += 6;
|
5322
|
+
if (pm_char_is_whitespace(*cursor)) break;
|
5323
|
+
continue;
|
5324
|
+
}
|
5325
|
+
if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
|
5326
|
+
separator = false;
|
5327
|
+
}
|
5328
|
+
|
5329
|
+
while (true) {
|
5330
|
+
do {
|
5331
|
+
if (++cursor >= end) return;
|
5332
|
+
} while (pm_char_is_whitespace(*cursor));
|
5333
|
+
|
5334
|
+
if (separator) break;
|
5335
|
+
if (*cursor != '=' && *cursor != ':') return;
|
5336
|
+
|
5337
|
+
separator = true;
|
5338
|
+
cursor++;
|
5339
|
+
}
|
5340
|
+
|
5341
|
+
const uint8_t *value_start = cursor;
|
5342
|
+
while ((*cursor == '-' || *cursor == '_' || parser->encoding.alnum_char(cursor, 1)) && ++cursor < end);
|
5343
|
+
|
5344
|
+
parser_lex_magic_comment_encoding_value(parser, value_start, cursor);
|
5202
5345
|
}
|
5203
5346
|
|
5204
5347
|
// Check if this is a magic comment that includes the frozen_string_literal
|
5205
5348
|
// pragma. If it does, set that field on the parser.
|
5206
5349
|
static void
|
5207
|
-
|
5208
|
-
const uint8_t *
|
5350
|
+
parser_lex_magic_comment_frozen_string_literal_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
5351
|
+
if (start + 4 <= end && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
|
5352
|
+
parser->frozen_string_literal = true;
|
5353
|
+
}
|
5354
|
+
}
|
5355
|
+
|
5356
|
+
static inline bool
|
5357
|
+
pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
|
5358
|
+
return b == '\'' || b == '"' || b == ':' || b == ';';
|
5359
|
+
}
|
5360
|
+
|
5361
|
+
// Find an emacs magic comment marker (-*-) within the given bounds. If one is
|
5362
|
+
// found, it returns a pointer to the start of the marker. Otherwise it returns
|
5363
|
+
// NULL.
|
5364
|
+
static inline const uint8_t *
|
5365
|
+
parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
|
5366
|
+
while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
|
5367
|
+
if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
|
5368
|
+
return cursor;
|
5369
|
+
}
|
5370
|
+
cursor++;
|
5371
|
+
}
|
5372
|
+
return NULL;
|
5373
|
+
}
|
5374
|
+
|
5375
|
+
// Parse the current token on the parser to see if it's a magic comment and
|
5376
|
+
// potentially perform some action based on that. A regular expression that this
|
5377
|
+
// function is effectively matching is:
|
5378
|
+
//
|
5379
|
+
// %r"([^\\s\'\":;]+)\\s*:\\s*(\"(?:\\\\.|[^\"])*\"|[^\"\\s;]+)[\\s;]*"
|
5380
|
+
//
|
5381
|
+
// It returns true if it consumes the entire comment. Otherwise it returns
|
5382
|
+
// false.
|
5383
|
+
static inline bool
|
5384
|
+
parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
|
5385
|
+
const uint8_t *start = parser->current.start + 1;
|
5209
5386
|
const uint8_t *end = parser->current.end;
|
5387
|
+
if (end - start <= 7) return false;
|
5388
|
+
|
5389
|
+
const uint8_t *cursor;
|
5390
|
+
bool indicator = false;
|
5210
5391
|
|
5211
|
-
|
5212
|
-
|
5392
|
+
if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
|
5393
|
+
start = cursor + 3;
|
5213
5394
|
|
5214
|
-
|
5395
|
+
if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
|
5396
|
+
end = cursor;
|
5397
|
+
indicator = true;
|
5398
|
+
} else {
|
5399
|
+
// If we have a start marker but not an end marker, then we cannot
|
5400
|
+
// have a magic comment.
|
5401
|
+
return false;
|
5402
|
+
}
|
5403
|
+
}
|
5215
5404
|
|
5216
|
-
|
5217
|
-
|
5218
|
-
|
5219
|
-
cursor += pm_strspn_inline_whitespace(cursor, end - cursor);
|
5405
|
+
cursor = start;
|
5406
|
+
while (cursor < end) {
|
5407
|
+
while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
|
5220
5408
|
|
5221
|
-
|
5222
|
-
|
5223
|
-
cursor += pm_strspn_inline_whitespace(cursor, end - cursor);
|
5409
|
+
const uint8_t *key_start = cursor;
|
5410
|
+
while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
|
5224
5411
|
|
5225
|
-
|
5226
|
-
|
5227
|
-
|
5412
|
+
const uint8_t *key_end = cursor;
|
5413
|
+
while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
|
5414
|
+
if (cursor == end) break;
|
5228
5415
|
|
5229
|
-
|
5416
|
+
if (*cursor == ':') {
|
5417
|
+
cursor++;
|
5418
|
+
} else {
|
5419
|
+
if (!indicator) return false;
|
5420
|
+
continue;
|
5421
|
+
}
|
5422
|
+
|
5423
|
+
while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
|
5424
|
+
if (cursor == end) break;
|
5425
|
+
|
5426
|
+
const uint8_t *value_start;
|
5427
|
+
const uint8_t *value_end;
|
5428
|
+
|
5429
|
+
if (*cursor == '"') {
|
5430
|
+
value_start = ++cursor;
|
5431
|
+
for (; cursor < end && *cursor != '"'; cursor++) {
|
5432
|
+
if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
|
5230
5433
|
}
|
5434
|
+
value_end = cursor;
|
5435
|
+
} else {
|
5436
|
+
value_start = cursor;
|
5437
|
+
while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
|
5438
|
+
value_end = cursor;
|
5231
5439
|
}
|
5232
5440
|
|
5233
|
-
|
5441
|
+
if (indicator) {
|
5442
|
+
while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
|
5443
|
+
} else {
|
5444
|
+
while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
|
5445
|
+
if (cursor != end) return false;
|
5446
|
+
}
|
5447
|
+
|
5448
|
+
// Here, we need to do some processing on the key to swap out dashes for
|
5449
|
+
// underscores. We only need to do this if there _is_ a dash in the key.
|
5450
|
+
pm_string_t key;
|
5451
|
+
const size_t key_length = (size_t) (key_end - key_start);
|
5452
|
+
const uint8_t *dash = pm_memchr(key_start, '-', (size_t) key_length, parser->encoding_changed, &parser->encoding);
|
5453
|
+
|
5454
|
+
if (dash == NULL) {
|
5455
|
+
pm_string_shared_init(&key, key_start, key_end);
|
5456
|
+
} else {
|
5457
|
+
size_t width = (size_t) (key_end - key_start);
|
5458
|
+
uint8_t *buffer = malloc(width);
|
5459
|
+
if (buffer == NULL) break;
|
5460
|
+
|
5461
|
+
memcpy(buffer, key_start, width);
|
5462
|
+
buffer[dash - key_start] = '_';
|
5463
|
+
|
5464
|
+
while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, &parser->encoding)) != NULL) {
|
5465
|
+
buffer[dash - key_start] = '_';
|
5466
|
+
}
|
5467
|
+
|
5468
|
+
pm_string_owned_init(&key, buffer, width);
|
5469
|
+
}
|
5470
|
+
|
5471
|
+
// Finally, we can start checking the key against the list of known
|
5472
|
+
// magic comment keys, and potentially change state based on that.
|
5473
|
+
const uint8_t *key_source = pm_string_source(&key);
|
5474
|
+
|
5475
|
+
// We only want to attempt to compare against encoding comments if it's
|
5476
|
+
// the first line in the file (or the second in the case of a shebang).
|
5477
|
+
if (parser->current.start == parser->encoding_comment_start) {
|
5478
|
+
if (
|
5479
|
+
(key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
|
5480
|
+
(key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
|
5481
|
+
) {
|
5482
|
+
parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
|
5483
|
+
}
|
5484
|
+
}
|
5485
|
+
|
5486
|
+
// We only want to handle frozen string literal comments if it's before
|
5487
|
+
// any semantic tokens have been seen.
|
5488
|
+
if (!semantic_token_seen) {
|
5489
|
+
if (key_length == 21 && pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
|
5490
|
+
parser_lex_magic_comment_frozen_string_literal_value(parser, value_start, value_end);
|
5491
|
+
}
|
5492
|
+
}
|
5493
|
+
|
5494
|
+
// When we're done, we want to free the string in case we had to
|
5495
|
+
// allocate memory for it.
|
5496
|
+
pm_string_free(&key);
|
5497
|
+
|
5498
|
+
// Allocate a new magic comment node to append to the parser's list.
|
5499
|
+
pm_magic_comment_t *magic_comment;
|
5500
|
+
if ((magic_comment = (pm_magic_comment_t *) calloc(sizeof(pm_magic_comment_t), 1)) != NULL) {
|
5501
|
+
magic_comment->key_start = key_start;
|
5502
|
+
magic_comment->value_start = value_start;
|
5503
|
+
magic_comment->key_length = (uint32_t) key_length;
|
5504
|
+
magic_comment->value_length = (uint32_t) (value_end - value_start);
|
5505
|
+
pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
|
5506
|
+
}
|
5234
5507
|
}
|
5508
|
+
|
5509
|
+
return true;
|
5235
5510
|
}
|
5236
5511
|
|
5237
5512
|
/******************************************************************************/
|
@@ -5366,7 +5641,7 @@ context_def_p(pm_parser_t *parser) {
|
|
5366
5641
|
static void
|
5367
5642
|
pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *invalid) {
|
5368
5643
|
if (invalid != NULL) {
|
5369
|
-
|
5644
|
+
pm_parser_err(parser, invalid, invalid + 1, PM_ERR_INVALID_NUMBER_UNDERSCORE);
|
5370
5645
|
}
|
5371
5646
|
}
|
5372
5647
|
|
@@ -5430,7 +5705,7 @@ lex_optional_float_suffix(pm_parser_t *parser) {
|
|
5430
5705
|
parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
|
5431
5706
|
type = PM_TOKEN_FLOAT;
|
5432
5707
|
} else {
|
5433
|
-
|
5708
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
|
5434
5709
|
type = PM_TOKEN_FLOAT;
|
5435
5710
|
}
|
5436
5711
|
}
|
@@ -5451,7 +5726,7 @@ lex_numeric_prefix(pm_parser_t *parser) {
|
|
5451
5726
|
if (pm_char_is_decimal_digit(peek(parser))) {
|
5452
5727
|
parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
|
5453
5728
|
} else {
|
5454
|
-
|
5729
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
|
5455
5730
|
}
|
5456
5731
|
|
5457
5732
|
break;
|
@@ -5463,7 +5738,7 @@ lex_numeric_prefix(pm_parser_t *parser) {
|
|
5463
5738
|
if (pm_char_is_binary_digit(peek(parser))) {
|
5464
5739
|
parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
|
5465
5740
|
} else {
|
5466
|
-
|
5741
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
|
5467
5742
|
}
|
5468
5743
|
|
5469
5744
|
parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
|
@@ -5476,7 +5751,7 @@ lex_numeric_prefix(pm_parser_t *parser) {
|
|
5476
5751
|
if (pm_char_is_octal_digit(peek(parser))) {
|
5477
5752
|
parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
|
5478
5753
|
} else {
|
5479
|
-
|
5754
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
|
5480
5755
|
}
|
5481
5756
|
|
5482
5757
|
parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
|
@@ -5503,7 +5778,7 @@ lex_numeric_prefix(pm_parser_t *parser) {
|
|
5503
5778
|
if (pm_char_is_hexadecimal_digit(peek(parser))) {
|
5504
5779
|
parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
|
5505
5780
|
} else {
|
5506
|
-
|
5781
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
|
5507
5782
|
}
|
5508
5783
|
|
5509
5784
|
parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
|
@@ -5581,7 +5856,7 @@ lex_numeric(pm_parser_t *parser) {
|
|
5581
5856
|
static pm_token_type_t
|
5582
5857
|
lex_global_variable(pm_parser_t *parser) {
|
5583
5858
|
if (parser->current.end >= parser->end) {
|
5584
|
-
|
5859
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
5585
5860
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
5586
5861
|
}
|
5587
5862
|
|
@@ -5622,7 +5897,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
5622
5897
|
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
5623
5898
|
|
5624
5899
|
// $0 isn't allowed to be followed by anything.
|
5625
|
-
|
5900
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
5626
5901
|
}
|
5627
5902
|
|
5628
5903
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -5653,7 +5928,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
5653
5928
|
} else {
|
5654
5929
|
// If we get here, then we have a $ followed by something that isn't
|
5655
5930
|
// recognized as a global variable.
|
5656
|
-
|
5931
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
5657
5932
|
}
|
5658
5933
|
|
5659
5934
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -5962,52 +6237,475 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
|
|
5962
6237
|
}
|
5963
6238
|
}
|
5964
6239
|
|
5965
|
-
|
5966
|
-
|
5967
|
-
|
5968
|
-
|
5969
|
-
|
5970
|
-
// \t horizontal tab, ASCII 09h (TAB)
|
5971
|
-
// \n newline (line feed), ASCII 0Ah (LF)
|
5972
|
-
// \v vertical tab, ASCII 0Bh (VT)
|
5973
|
-
// \f form feed, ASCII 0Ch (FF)
|
5974
|
-
// \r carriage return, ASCII 0Dh (CR)
|
5975
|
-
// \e escape, ASCII 1Bh (ESC)
|
5976
|
-
// \s space, ASCII 20h (SPC)
|
5977
|
-
// \\ backslash
|
5978
|
-
// \nnn octal bit pattern, where nnn is 1-3 octal digits ([0-7])
|
5979
|
-
// \xnn hexadecimal bit pattern, where nn is 1-2 hexadecimal digits ([0-9a-fA-F])
|
5980
|
-
// \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
|
5981
|
-
// \u{nnnn ...} Unicode character(s), where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F])
|
5982
|
-
// \cx or \C-x control character, where x is an ASCII printable character
|
5983
|
-
// \M-x meta character, where x is an ASCII printable character
|
5984
|
-
// \M-\C-x meta control character, where x is an ASCII printable character
|
5985
|
-
// \M-\cx same as above
|
5986
|
-
// \c\M-x same as above
|
5987
|
-
// \c? or \C-? delete, ASCII 7Fh (DEL)
|
5988
|
-
//
|
5989
|
-
static pm_token_type_t
|
5990
|
-
lex_question_mark(pm_parser_t *parser) {
|
5991
|
-
if (lex_state_end_p(parser)) {
|
5992
|
-
lex_state_set(parser, PM_LEX_STATE_BEG);
|
5993
|
-
return PM_TOKEN_QUESTION_MARK;
|
5994
|
-
}
|
6240
|
+
static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
|
6241
|
+
static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
|
6242
|
+
static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
|
6243
|
+
static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
|
6244
|
+
static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
|
5995
6245
|
|
5996
|
-
|
5997
|
-
|
5998
|
-
|
5999
|
-
|
6246
|
+
// This is a lookup table for whether or not an ASCII character is printable.
|
6247
|
+
static const bool ascii_printable_chars[] = {
|
6248
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
|
6249
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
6250
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
6251
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
6252
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
6253
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
|
6254
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
6255
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
|
6256
|
+
};
|
6000
6257
|
|
6001
|
-
|
6002
|
-
|
6003
|
-
|
6004
|
-
|
6258
|
+
static inline bool
|
6259
|
+
char_is_ascii_printable(const uint8_t b) {
|
6260
|
+
return (b < 0x80) && ascii_printable_chars[b];
|
6261
|
+
}
|
6262
|
+
|
6263
|
+
// Return the value that a hexadecimal digit character represents. For example,
|
6264
|
+
// transform 'a' into 10, 'b' into 11, etc.
|
6265
|
+
static inline uint8_t
|
6266
|
+
escape_hexadecimal_digit(const uint8_t value) {
|
6267
|
+
return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
|
6268
|
+
}
|
6269
|
+
|
6270
|
+
// Scan the 4 digits of a Unicode escape into the value. Returns the number of
|
6271
|
+
// digits scanned. This function assumes that the characters have already been
|
6272
|
+
// validated.
|
6273
|
+
static inline uint32_t
|
6274
|
+
escape_unicode(const uint8_t *string, size_t length) {
|
6275
|
+
uint32_t value = 0;
|
6276
|
+
for (size_t index = 0; index < length; index++) {
|
6277
|
+
if (index != 0) value <<= 4;
|
6278
|
+
value |= escape_hexadecimal_digit(string[index]);
|
6279
|
+
}
|
6280
|
+
return value;
|
6281
|
+
}
|
6282
|
+
|
6283
|
+
// Escape a single character value based on the given flags.
|
6284
|
+
static inline uint8_t
|
6285
|
+
escape_byte(uint8_t value, const uint8_t flags) {
|
6286
|
+
if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x1f;
|
6287
|
+
if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
|
6288
|
+
return value;
|
6289
|
+
}
|
6290
|
+
|
6291
|
+
// Write a unicode codepoint to the given buffer.
|
6292
|
+
static inline void
|
6293
|
+
escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t *start, const uint8_t *end, uint32_t value) {
|
6294
|
+
if (value <= 0x7F) { // 0xxxxxxx
|
6295
|
+
pm_buffer_append_u8(buffer, (uint8_t) value);
|
6296
|
+
} else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
|
6297
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0xC0 | (value >> 6)));
|
6298
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
|
6299
|
+
} else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
|
6300
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0xE0 | (value >> 12)));
|
6301
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
|
6302
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
|
6303
|
+
} else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
6304
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0xF0 | (value >> 18)));
|
6305
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
|
6306
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
|
6307
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
|
6308
|
+
} else {
|
6309
|
+
pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
|
6310
|
+
pm_buffer_append_u8(buffer, 0xEF);
|
6311
|
+
pm_buffer_append_u8(buffer, 0xBF);
|
6312
|
+
pm_buffer_append_u8(buffer, 0xBD);
|
6313
|
+
}
|
6314
|
+
}
|
6315
|
+
|
6316
|
+
// The regular expression engine doesn't support the same escape sequences as
|
6317
|
+
// Ruby does. So first we have to read the escape sequence, and then we have to
|
6318
|
+
// format it like the regular expression engine expects it. For example, in Ruby
|
6319
|
+
// if we have:
|
6320
|
+
//
|
6321
|
+
// /\M-\C-?/
|
6322
|
+
//
|
6323
|
+
// then the first byte is actually 255, so we have to rewrite this as:
|
6324
|
+
//
|
6325
|
+
// /\xFF/
|
6326
|
+
//
|
6327
|
+
// Note that in this case there is a literal \ byte in the regular expression
|
6328
|
+
// source so that the regular expression engine will perform its own unescaping.
|
6329
|
+
static inline void
|
6330
|
+
escape_write_byte(pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
|
6331
|
+
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
6332
|
+
pm_buffer_append_bytes(buffer, (const uint8_t *) "\\x", 2);
|
6333
|
+
|
6334
|
+
uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
|
6335
|
+
uint8_t byte2 = (uint8_t) (byte & 0xF);
|
6336
|
+
|
6337
|
+
if (byte1 >= 0xA) {
|
6338
|
+
pm_buffer_append_u8(buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
|
6339
|
+
} else {
|
6340
|
+
pm_buffer_append_u8(buffer, (uint8_t) (byte1 + '0'));
|
6341
|
+
}
|
6342
|
+
|
6343
|
+
if (byte2 >= 0xA) {
|
6344
|
+
pm_buffer_append_u8(buffer, (uint8_t) (byte2 - 0xA + 'A'));
|
6345
|
+
} else {
|
6346
|
+
pm_buffer_append_u8(buffer, (uint8_t) (byte2 + '0'));
|
6347
|
+
}
|
6348
|
+
} else {
|
6349
|
+
pm_buffer_append_u8(buffer, byte);
|
6350
|
+
}
|
6351
|
+
}
|
6352
|
+
|
6353
|
+
// Read the value of an escape into the buffer.
|
6354
|
+
static void
|
6355
|
+
escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
|
6356
|
+
switch (peek(parser)) {
|
6357
|
+
case '\\': {
|
6358
|
+
parser->current.end++;
|
6359
|
+
pm_buffer_append_u8(buffer, '\\');
|
6360
|
+
return;
|
6361
|
+
}
|
6362
|
+
case '\'': {
|
6363
|
+
parser->current.end++;
|
6364
|
+
pm_buffer_append_u8(buffer, '\'');
|
6365
|
+
return;
|
6366
|
+
}
|
6367
|
+
case 'a': {
|
6368
|
+
parser->current.end++;
|
6369
|
+
pm_buffer_append_u8(buffer, '\a');
|
6370
|
+
return;
|
6371
|
+
}
|
6372
|
+
case 'b': {
|
6373
|
+
parser->current.end++;
|
6374
|
+
pm_buffer_append_u8(buffer, '\b');
|
6375
|
+
return;
|
6376
|
+
}
|
6377
|
+
case 'e': {
|
6378
|
+
parser->current.end++;
|
6379
|
+
pm_buffer_append_u8(buffer, '\033');
|
6380
|
+
return;
|
6381
|
+
}
|
6382
|
+
case 'f': {
|
6383
|
+
parser->current.end++;
|
6384
|
+
pm_buffer_append_u8(buffer, '\f');
|
6385
|
+
return;
|
6386
|
+
}
|
6387
|
+
case 'n': {
|
6388
|
+
parser->current.end++;
|
6389
|
+
pm_buffer_append_u8(buffer, '\n');
|
6390
|
+
return;
|
6391
|
+
}
|
6392
|
+
case 'r': {
|
6393
|
+
parser->current.end++;
|
6394
|
+
pm_buffer_append_u8(buffer, '\r');
|
6395
|
+
return;
|
6396
|
+
}
|
6397
|
+
case 's': {
|
6398
|
+
parser->current.end++;
|
6399
|
+
pm_buffer_append_u8(buffer, ' ');
|
6400
|
+
return;
|
6401
|
+
}
|
6402
|
+
case 't': {
|
6403
|
+
parser->current.end++;
|
6404
|
+
pm_buffer_append_u8(buffer, '\t');
|
6405
|
+
return;
|
6406
|
+
}
|
6407
|
+
case 'v': {
|
6408
|
+
parser->current.end++;
|
6409
|
+
pm_buffer_append_u8(buffer, '\v');
|
6410
|
+
return;
|
6411
|
+
}
|
6412
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
|
6413
|
+
uint8_t value = (uint8_t) (*parser->current.end - '0');
|
6414
|
+
parser->current.end++;
|
6415
|
+
|
6416
|
+
if (pm_char_is_octal_digit(peek(parser))) {
|
6417
|
+
value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
|
6418
|
+
parser->current.end++;
|
6419
|
+
|
6420
|
+
if (pm_char_is_octal_digit(peek(parser))) {
|
6421
|
+
value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
|
6422
|
+
parser->current.end++;
|
6423
|
+
}
|
6424
|
+
}
|
6425
|
+
|
6426
|
+
pm_buffer_append_u8(buffer, value);
|
6427
|
+
return;
|
6428
|
+
}
|
6429
|
+
case 'x': {
|
6430
|
+
const uint8_t *start = parser->current.end - 1;
|
6431
|
+
|
6432
|
+
parser->current.end++;
|
6433
|
+
uint8_t byte = peek(parser);
|
6434
|
+
|
6435
|
+
if (pm_char_is_hexadecimal_digit(byte)) {
|
6436
|
+
uint8_t value = escape_hexadecimal_digit(byte);
|
6437
|
+
parser->current.end++;
|
6438
|
+
|
6439
|
+
byte = peek(parser);
|
6440
|
+
if (pm_char_is_hexadecimal_digit(byte)) {
|
6441
|
+
value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
|
6442
|
+
parser->current.end++;
|
6443
|
+
}
|
6444
|
+
|
6445
|
+
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
6446
|
+
pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end - start));
|
6447
|
+
} else {
|
6448
|
+
pm_buffer_append_u8(buffer, value);
|
6449
|
+
}
|
6450
|
+
} else {
|
6451
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
|
6452
|
+
}
|
6453
|
+
|
6454
|
+
return;
|
6455
|
+
}
|
6456
|
+
case 'u': {
|
6457
|
+
const uint8_t *start = parser->current.end - 1;
|
6458
|
+
parser->current.end++;
|
6459
|
+
|
6460
|
+
if (
|
6461
|
+
(parser->current.end + 4 <= parser->end) &&
|
6462
|
+
pm_char_is_hexadecimal_digit(parser->current.end[0]) &&
|
6463
|
+
pm_char_is_hexadecimal_digit(parser->current.end[1]) &&
|
6464
|
+
pm_char_is_hexadecimal_digit(parser->current.end[2]) &&
|
6465
|
+
pm_char_is_hexadecimal_digit(parser->current.end[3])
|
6466
|
+
) {
|
6467
|
+
uint32_t value = escape_unicode(parser->current.end, 4);
|
6468
|
+
|
6469
|
+
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
6470
|
+
pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end + 4 - start));
|
6471
|
+
} else {
|
6472
|
+
escape_write_unicode(parser, buffer, start, parser->current.end + 4, value);
|
6473
|
+
}
|
6474
|
+
|
6475
|
+
parser->current.end += 4;
|
6476
|
+
} else if (peek(parser) == '{') {
|
6477
|
+
const uint8_t *unicode_codepoints_start = parser->current.end - 2;
|
6478
|
+
|
6479
|
+
parser->current.end++;
|
6480
|
+
parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
|
6481
|
+
|
6482
|
+
const uint8_t *extra_codepoints_start = NULL;
|
6483
|
+
int codepoints_count = 0;
|
6484
|
+
|
6485
|
+
while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
|
6486
|
+
const uint8_t *unicode_start = parser->current.end;
|
6487
|
+
size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
|
6488
|
+
|
6489
|
+
if (hexadecimal_length > 6) {
|
6490
|
+
// \u{nnnn} character literal allows only 1-6 hexadecimal digits
|
6491
|
+
pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
|
6492
|
+
} else if (hexadecimal_length == 0) {
|
6493
|
+
// there are not hexadecimal characters
|
6494
|
+
pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE);
|
6495
|
+
return;
|
6496
|
+
}
|
6497
|
+
|
6498
|
+
parser->current.end += hexadecimal_length;
|
6499
|
+
codepoints_count++;
|
6500
|
+
if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
|
6501
|
+
extra_codepoints_start = unicode_start;
|
6502
|
+
}
|
6503
|
+
|
6504
|
+
if (!(flags & PM_ESCAPE_FLAG_REGEXP)) {
|
6505
|
+
uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
|
6506
|
+
escape_write_unicode(parser, buffer, unicode_start, parser->current.end, value);
|
6507
|
+
}
|
6508
|
+
|
6509
|
+
parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
|
6510
|
+
}
|
6511
|
+
|
6512
|
+
// ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
|
6513
|
+
if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
|
6514
|
+
pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
|
6515
|
+
}
|
6516
|
+
|
6517
|
+
if (peek(parser) == '}') {
|
6518
|
+
parser->current.end++;
|
6519
|
+
} else {
|
6520
|
+
pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
|
6521
|
+
}
|
6522
|
+
|
6523
|
+
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
6524
|
+
pm_buffer_append_bytes(buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
|
6525
|
+
}
|
6526
|
+
} else {
|
6527
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
|
6528
|
+
}
|
6529
|
+
|
6530
|
+
return;
|
6531
|
+
}
|
6532
|
+
case 'c': {
|
6533
|
+
parser->current.end++;
|
6534
|
+
if (parser->current.end == parser->end) {
|
6535
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
|
6536
|
+
return;
|
6537
|
+
}
|
6538
|
+
|
6539
|
+
uint8_t peeked = peek(parser);
|
6540
|
+
switch (peeked) {
|
6541
|
+
case '?': {
|
6542
|
+
parser->current.end++;
|
6543
|
+
escape_write_byte(buffer, flags, escape_byte(0x7f, flags));
|
6544
|
+
return;
|
6545
|
+
}
|
6546
|
+
case '\\':
|
6547
|
+
if (flags & PM_ESCAPE_FLAG_CONTROL) {
|
6548
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
6549
|
+
return;
|
6550
|
+
}
|
6551
|
+
parser->current.end++;
|
6552
|
+
escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_CONTROL);
|
6553
|
+
return;
|
6554
|
+
default: {
|
6555
|
+
if (!char_is_ascii_printable(peeked)) {
|
6556
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
|
6557
|
+
return;
|
6558
|
+
}
|
6559
|
+
|
6560
|
+
parser->current.end++;
|
6561
|
+
escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
|
6562
|
+
return;
|
6563
|
+
}
|
6564
|
+
}
|
6565
|
+
}
|
6566
|
+
case 'C': {
|
6567
|
+
parser->current.end++;
|
6568
|
+
if (peek(parser) != '-') {
|
6569
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
|
6570
|
+
return;
|
6571
|
+
}
|
6572
|
+
|
6573
|
+
parser->current.end++;
|
6574
|
+
if (parser->current.end == parser->end) {
|
6575
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
|
6576
|
+
return;
|
6577
|
+
}
|
6578
|
+
|
6579
|
+
uint8_t peeked = peek(parser);
|
6580
|
+
switch (peeked) {
|
6581
|
+
case '?': {
|
6582
|
+
parser->current.end++;
|
6583
|
+
escape_write_byte(buffer, flags, escape_byte(0x7f, flags));
|
6584
|
+
return;
|
6585
|
+
}
|
6586
|
+
case '\\':
|
6587
|
+
if (flags & PM_ESCAPE_FLAG_CONTROL) {
|
6588
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
6589
|
+
return;
|
6590
|
+
}
|
6591
|
+
parser->current.end++;
|
6592
|
+
escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_CONTROL);
|
6593
|
+
return;
|
6594
|
+
default: {
|
6595
|
+
if (!char_is_ascii_printable(peeked)) {
|
6596
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
|
6597
|
+
return;
|
6598
|
+
}
|
6599
|
+
|
6600
|
+
parser->current.end++;
|
6601
|
+
escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
|
6602
|
+
return;
|
6603
|
+
}
|
6604
|
+
}
|
6605
|
+
}
|
6606
|
+
case 'M': {
|
6607
|
+
parser->current.end++;
|
6608
|
+
if (peek(parser) != '-') {
|
6609
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
|
6610
|
+
return;
|
6611
|
+
}
|
6612
|
+
|
6613
|
+
parser->current.end++;
|
6614
|
+
if (parser->current.end == parser->end) {
|
6615
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
|
6616
|
+
return;
|
6617
|
+
}
|
6618
|
+
|
6619
|
+
uint8_t peeked = peek(parser);
|
6620
|
+
if (peeked == '\\') {
|
6621
|
+
if (flags & PM_ESCAPE_FLAG_META) {
|
6622
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
|
6623
|
+
return;
|
6624
|
+
}
|
6625
|
+
parser->current.end++;
|
6626
|
+
escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_META);
|
6627
|
+
return;
|
6628
|
+
}
|
6629
|
+
|
6630
|
+
if (!char_is_ascii_printable(peeked)) {
|
6631
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
|
6632
|
+
return;
|
6633
|
+
}
|
6634
|
+
|
6635
|
+
parser->current.end++;
|
6636
|
+
escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
|
6637
|
+
return;
|
6638
|
+
}
|
6639
|
+
case '\r': {
|
6640
|
+
if (peek_offset(parser, 1) == '\n') {
|
6641
|
+
parser->current.end += 2;
|
6642
|
+
pm_buffer_append_u8(buffer, '\n');
|
6643
|
+
return;
|
6644
|
+
}
|
6645
|
+
}
|
6646
|
+
/* fallthrough */
|
6647
|
+
default: {
|
6648
|
+
if (parser->current.end < parser->end) {
|
6649
|
+
pm_buffer_append_u8(buffer, *parser->current.end++);
|
6650
|
+
}
|
6651
|
+
return;
|
6652
|
+
}
|
6653
|
+
}
|
6654
|
+
}
|
6655
|
+
|
6656
|
+
// This function is responsible for lexing either a character literal or the ?
|
6657
|
+
// operator. The supported character literals are described below.
|
6658
|
+
//
|
6659
|
+
// \a bell, ASCII 07h (BEL)
|
6660
|
+
// \b backspace, ASCII 08h (BS)
|
6661
|
+
// \t horizontal tab, ASCII 09h (TAB)
|
6662
|
+
// \n newline (line feed), ASCII 0Ah (LF)
|
6663
|
+
// \v vertical tab, ASCII 0Bh (VT)
|
6664
|
+
// \f form feed, ASCII 0Ch (FF)
|
6665
|
+
// \r carriage return, ASCII 0Dh (CR)
|
6666
|
+
// \e escape, ASCII 1Bh (ESC)
|
6667
|
+
// \s space, ASCII 20h (SPC)
|
6668
|
+
// \\ backslash
|
6669
|
+
// \nnn octal bit pattern, where nnn is 1-3 octal digits ([0-7])
|
6670
|
+
// \xnn hexadecimal bit pattern, where nn is 1-2 hexadecimal digits ([0-9a-fA-F])
|
6671
|
+
// \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
|
6672
|
+
// \u{nnnn ...} Unicode character(s), where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F])
|
6673
|
+
// \cx or \C-x control character, where x is an ASCII printable character
|
6674
|
+
// \M-x meta character, where x is an ASCII printable character
|
6675
|
+
// \M-\C-x meta control character, where x is an ASCII printable character
|
6676
|
+
// \M-\cx same as above
|
6677
|
+
// \c\M-x same as above
|
6678
|
+
// \c? or \C-? delete, ASCII 7Fh (DEL)
|
6679
|
+
//
|
6680
|
+
static pm_token_type_t
|
6681
|
+
lex_question_mark(pm_parser_t *parser) {
|
6682
|
+
if (lex_state_end_p(parser)) {
|
6683
|
+
lex_state_set(parser, PM_LEX_STATE_BEG);
|
6684
|
+
return PM_TOKEN_QUESTION_MARK;
|
6685
|
+
}
|
6686
|
+
|
6687
|
+
if (parser->current.end >= parser->end) {
|
6688
|
+
pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
|
6689
|
+
pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
|
6690
|
+
return PM_TOKEN_CHARACTER_LITERAL;
|
6691
|
+
}
|
6692
|
+
|
6693
|
+
if (pm_char_is_whitespace(*parser->current.end)) {
|
6694
|
+
lex_state_set(parser, PM_LEX_STATE_BEG);
|
6695
|
+
return PM_TOKEN_QUESTION_MARK;
|
6696
|
+
}
|
6005
6697
|
|
6006
6698
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
6007
6699
|
|
6008
|
-
if (parser
|
6700
|
+
if (match(parser, '\\')) {
|
6009
6701
|
lex_state_set(parser, PM_LEX_STATE_END);
|
6010
|
-
|
6702
|
+
|
6703
|
+
pm_buffer_t buffer;
|
6704
|
+
pm_buffer_init_capacity(&buffer, 3);
|
6705
|
+
|
6706
|
+
escape_read(parser, &buffer, PM_ESCAPE_FLAG_SINGLE);
|
6707
|
+
pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
|
6708
|
+
|
6011
6709
|
return PM_TOKEN_CHARACTER_LITERAL;
|
6012
6710
|
} else {
|
6013
6711
|
size_t encoding_width = parser->encoding.char_width(parser->current.end, parser->end - parser->current.end);
|
@@ -6024,6 +6722,7 @@ lex_question_mark(pm_parser_t *parser) {
|
|
6024
6722
|
) {
|
6025
6723
|
lex_state_set(parser, PM_LEX_STATE_END);
|
6026
6724
|
parser->current.end += encoding_width;
|
6725
|
+
pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
|
6027
6726
|
return PM_TOKEN_CHARACTER_LITERAL;
|
6028
6727
|
}
|
6029
6728
|
}
|
@@ -6045,9 +6744,9 @@ lex_at_variable(pm_parser_t *parser) {
|
|
6045
6744
|
parser->current.end += width;
|
6046
6745
|
}
|
6047
6746
|
} else if (type == PM_TOKEN_CLASS_VARIABLE) {
|
6048
|
-
|
6747
|
+
pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
|
6049
6748
|
} else {
|
6050
|
-
|
6749
|
+
pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_INSTANCE);
|
6051
6750
|
}
|
6052
6751
|
|
6053
6752
|
// If we're lexing an embedded variable, then we need to pop back into the
|
@@ -6070,7 +6769,7 @@ parser_lex_callback(pm_parser_t *parser) {
|
|
6070
6769
|
// Return a new comment node of the specified type.
|
6071
6770
|
static inline pm_comment_t *
|
6072
6771
|
parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
|
6073
|
-
pm_comment_t *comment = (pm_comment_t *)
|
6772
|
+
pm_comment_t *comment = (pm_comment_t *) calloc(sizeof(pm_comment_t), 1);
|
6074
6773
|
if (comment == NULL) return NULL;
|
6075
6774
|
|
6076
6775
|
*comment = (pm_comment_t) {
|
@@ -6146,7 +6845,7 @@ lex_embdoc(pm_parser_t *parser) {
|
|
6146
6845
|
parser_lex_callback(parser);
|
6147
6846
|
}
|
6148
6847
|
|
6149
|
-
|
6848
|
+
pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
|
6150
6849
|
|
6151
6850
|
comment->end = parser->current.end;
|
6152
6851
|
pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
|
@@ -6177,6 +6876,113 @@ parser_flush_heredoc_end(pm_parser_t *parser) {
|
|
6177
6876
|
parser->heredoc_end = NULL;
|
6178
6877
|
}
|
6179
6878
|
|
6879
|
+
// When we're lexing certain types (strings, symbols, lists, etc.) we have
|
6880
|
+
// string content associated with the tokens. For example:
|
6881
|
+
//
|
6882
|
+
// "foo"
|
6883
|
+
//
|
6884
|
+
// In this case, the string content is foo. Since there is no escaping, there's
|
6885
|
+
// no need to track additional information and the token can be returned as
|
6886
|
+
// normal. However, if we have escape sequences:
|
6887
|
+
//
|
6888
|
+
// "foo\n"
|
6889
|
+
//
|
6890
|
+
// then the bytes in the string are "f", "o", "o", "\", "n", but we want to
|
6891
|
+
// provide out consumers with the string content "f", "o", "o", "\n". In these
|
6892
|
+
// cases, when we find the first escape sequence, we initialize a pm_buffer_t
|
6893
|
+
// to keep track of the string content. Then in the parser, it will
|
6894
|
+
// automatically attach the string content to the node that it belongs to.
|
6895
|
+
typedef struct {
|
6896
|
+
pm_buffer_t buffer;
|
6897
|
+
const uint8_t *cursor;
|
6898
|
+
} pm_token_buffer_t;
|
6899
|
+
|
6900
|
+
// Push the given byte into the token buffer.
|
6901
|
+
static inline void
|
6902
|
+
pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
|
6903
|
+
pm_buffer_append_u8(&token_buffer->buffer, byte);
|
6904
|
+
}
|
6905
|
+
|
6906
|
+
// When we're about to return from lexing the current token and we know for sure
|
6907
|
+
// that we have found an escape sequence, this function is called to copy the
|
6908
|
+
// contents of the token buffer into the current string on the parser so that it
|
6909
|
+
// can be attached to the correct node.
|
6910
|
+
static inline void
|
6911
|
+
pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
6912
|
+
pm_string_owned_init(&parser->current_string, (uint8_t *) token_buffer->buffer.value, token_buffer->buffer.length);
|
6913
|
+
}
|
6914
|
+
|
6915
|
+
// When we're about to return from lexing the current token, we need to flush
|
6916
|
+
// all of the content that we have pushed into the buffer into the current
|
6917
|
+
// string. If we haven't pushed anything into the buffer, this means that we
|
6918
|
+
// never found an escape sequence, so we can directly reference the bounds of
|
6919
|
+
// the current string. Either way, at the return of this function it is expected
|
6920
|
+
// that parser->current_string is established in such a way that it can be
|
6921
|
+
// attached to a node.
|
6922
|
+
static void
|
6923
|
+
pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
6924
|
+
if (token_buffer->cursor == NULL) {
|
6925
|
+
pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
|
6926
|
+
} else {
|
6927
|
+
pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
|
6928
|
+
pm_token_buffer_copy(parser, token_buffer);
|
6929
|
+
}
|
6930
|
+
}
|
6931
|
+
|
6932
|
+
// When we've found an escape sequence, we need to copy everything up to this
|
6933
|
+
// point into the buffer because we're about to provide a string that has
|
6934
|
+
// different content than a direct slice of the source.
|
6935
|
+
//
|
6936
|
+
// It is expected that the parser's current token end will be pointing at one
|
6937
|
+
// byte past the backslash that starts the escape sequence.
|
6938
|
+
static void
|
6939
|
+
pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
6940
|
+
const uint8_t *start;
|
6941
|
+
if (token_buffer->cursor == NULL) {
|
6942
|
+
pm_buffer_init_capacity(&token_buffer->buffer, 16);
|
6943
|
+
start = parser->current.start;
|
6944
|
+
} else {
|
6945
|
+
start = token_buffer->cursor;
|
6946
|
+
}
|
6947
|
+
|
6948
|
+
const uint8_t *end = parser->current.end - 1;
|
6949
|
+
pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
|
6950
|
+
}
|
6951
|
+
|
6952
|
+
// Effectively the same thing as pm_strspn_inline_whitespace, but in the case of
|
6953
|
+
// a tilde heredoc expands out tab characters to the nearest tab boundaries.
|
6954
|
+
static inline size_t
|
6955
|
+
pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
|
6956
|
+
size_t whitespace = 0;
|
6957
|
+
|
6958
|
+
switch (indent) {
|
6959
|
+
case PM_HEREDOC_INDENT_NONE:
|
6960
|
+
// Do nothing, we can't match a terminator with
|
6961
|
+
// indentation and there's no need to calculate common
|
6962
|
+
// whitespace.
|
6963
|
+
break;
|
6964
|
+
case PM_HEREDOC_INDENT_DASH:
|
6965
|
+
// Skip past inline whitespace.
|
6966
|
+
*cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
|
6967
|
+
break;
|
6968
|
+
case PM_HEREDOC_INDENT_TILDE:
|
6969
|
+
// Skip past inline whitespace and calculate common
|
6970
|
+
// whitespace.
|
6971
|
+
while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
|
6972
|
+
if (**cursor == '\t') {
|
6973
|
+
whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
|
6974
|
+
} else {
|
6975
|
+
whitespace++;
|
6976
|
+
}
|
6977
|
+
(*cursor)++;
|
6978
|
+
}
|
6979
|
+
|
6980
|
+
break;
|
6981
|
+
}
|
6982
|
+
|
6983
|
+
return whitespace;
|
6984
|
+
}
|
6985
|
+
|
6180
6986
|
// This is a convenience macro that will set the current token type, call the
|
6181
6987
|
// lex callback, and then return from the parser_lex function.
|
6182
6988
|
#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
|
@@ -6301,12 +7107,16 @@ parser_lex(pm_parser_t *parser) {
|
|
6301
7107
|
parser->current.type = PM_TOKEN_COMMENT;
|
6302
7108
|
parser_lex_callback(parser);
|
6303
7109
|
|
6304
|
-
if
|
6305
|
-
|
6306
|
-
|
7110
|
+
// Here, parse the comment to see if it's a magic comment
|
7111
|
+
// and potentially change state on the parser.
|
7112
|
+
if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
|
7113
|
+
ptrdiff_t length = parser->current.end - parser->current.start;
|
6307
7114
|
|
6308
|
-
|
6309
|
-
|
7115
|
+
// If we didn't find a magic comment within the first
|
7116
|
+
// pass and we're at the start of the file, then we need
|
7117
|
+
// to do another pass to potentially find other patterns
|
7118
|
+
// for encoding comments.
|
7119
|
+
if (length >= 10) parser_lex_magic_comment_encoding(parser);
|
6310
7120
|
}
|
6311
7121
|
|
6312
7122
|
lexed_comment = true;
|
@@ -6588,7 +7398,7 @@ parser_lex(pm_parser_t *parser) {
|
|
6588
7398
|
pm_token_type_t type = PM_TOKEN_STAR;
|
6589
7399
|
|
6590
7400
|
if (lex_state_spcarg_p(parser, space_seen)) {
|
6591
|
-
|
7401
|
+
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
|
6592
7402
|
type = PM_TOKEN_USTAR;
|
6593
7403
|
} else if (lex_state_beg_p(parser)) {
|
6594
7404
|
type = PM_TOKEN_USTAR;
|
@@ -6626,7 +7436,7 @@ parser_lex(pm_parser_t *parser) {
|
|
6626
7436
|
|
6627
7437
|
// = => =~ == === =begin
|
6628
7438
|
case '=':
|
6629
|
-
if (current_token_starts_line(parser) &&
|
7439
|
+
if (current_token_starts_line(parser) && (parser->current.end + 5 <= parser->end) && memcmp(parser->current.end, "begin", 5) == 0 && pm_char_is_whitespace(peek_offset(parser, 5))) {
|
6630
7440
|
pm_token_type_t type = lex_embdoc(parser);
|
6631
7441
|
|
6632
7442
|
if (type == PM_TOKEN_EOF) {
|
@@ -6720,7 +7530,8 @@ parser_lex(pm_parser_t *parser) {
|
|
6720
7530
|
.ident_length = ident_length,
|
6721
7531
|
.next_start = parser->current.end,
|
6722
7532
|
.quote = quote,
|
6723
|
-
.indent = indent
|
7533
|
+
.indent = indent,
|
7534
|
+
.common_whitespace = (size_t) -1
|
6724
7535
|
}
|
6725
7536
|
});
|
6726
7537
|
|
@@ -6732,7 +7543,7 @@ parser_lex(pm_parser_t *parser) {
|
|
6732
7543
|
// this is not a valid heredoc declaration. In this case we
|
6733
7544
|
// will add an error, but we will still return a heredoc
|
6734
7545
|
// start.
|
6735
|
-
|
7546
|
+
pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
|
6736
7547
|
body_start = parser->end;
|
6737
7548
|
} else {
|
6738
7549
|
// Otherwise, we want to indicate that the body of the
|
@@ -6925,12 +7736,7 @@ parser_lex(pm_parser_t *parser) {
|
|
6925
7736
|
|
6926
7737
|
bool spcarg = lex_state_spcarg_p(parser, space_seen);
|
6927
7738
|
if (spcarg) {
|
6928
|
-
|
6929
|
-
&parser->warning_list,
|
6930
|
-
parser->current.start,
|
6931
|
-
parser->current.end,
|
6932
|
-
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS
|
6933
|
-
);
|
7739
|
+
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS);
|
6934
7740
|
}
|
6935
7741
|
|
6936
7742
|
if (lex_state_beg_p(parser) || spcarg) {
|
@@ -6974,12 +7780,7 @@ parser_lex(pm_parser_t *parser) {
|
|
6974
7780
|
|
6975
7781
|
bool spcarg = lex_state_spcarg_p(parser, space_seen);
|
6976
7782
|
if (spcarg) {
|
6977
|
-
|
6978
|
-
&parser->warning_list,
|
6979
|
-
parser->current.start,
|
6980
|
-
parser->current.end,
|
6981
|
-
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS
|
6982
|
-
);
|
7783
|
+
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
|
6983
7784
|
}
|
6984
7785
|
|
6985
7786
|
if (lex_state_beg_p(parser) || spcarg) {
|
@@ -7076,7 +7877,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7076
7877
|
}
|
7077
7878
|
|
7078
7879
|
if (lex_state_spcarg_p(parser, space_seen)) {
|
7079
|
-
|
7880
|
+
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
|
7080
7881
|
lex_mode_push_regexp(parser, '\0', '/');
|
7081
7882
|
LEX(PM_TOKEN_REGEXP_BEGIN);
|
7082
7883
|
}
|
@@ -7116,7 +7917,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7116
7917
|
// operator because we don't want to move into the string
|
7117
7918
|
// lex mode unnecessarily.
|
7118
7919
|
if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
|
7119
|
-
|
7920
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
|
7120
7921
|
LEX(PM_TOKEN_PERCENT);
|
7121
7922
|
}
|
7122
7923
|
|
@@ -7149,7 +7950,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7149
7950
|
// validate that here.
|
7150
7951
|
uint8_t delimiter = peek_offset(parser, 1);
|
7151
7952
|
if (delimiter >= 0x80 || parser->encoding.alnum_char(&delimiter, 1)) {
|
7152
|
-
|
7953
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
|
7153
7954
|
goto lex_next_token;
|
7154
7955
|
}
|
7155
7956
|
|
@@ -7249,7 +8050,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7249
8050
|
// unparseable. In this case we'll just drop it from the parser
|
7250
8051
|
// and skip past it and hope that the next token is something
|
7251
8052
|
// that we can parse.
|
7252
|
-
|
8053
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
|
7253
8054
|
goto lex_next_token;
|
7254
8055
|
}
|
7255
8056
|
}
|
@@ -7285,7 +8086,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7285
8086
|
// token as we've exhausted all of the other options. We'll skip past
|
7286
8087
|
// it and return the next token.
|
7287
8088
|
if (!width) {
|
7288
|
-
|
8089
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_TOKEN);
|
7289
8090
|
goto lex_next_token;
|
7290
8091
|
}
|
7291
8092
|
|
@@ -7351,7 +8152,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7351
8152
|
}
|
7352
8153
|
}
|
7353
8154
|
}
|
7354
|
-
case PM_LEX_LIST:
|
8155
|
+
case PM_LEX_LIST: {
|
7355
8156
|
if (parser->next_start != NULL) {
|
7356
8157
|
parser->current.end = parser->next_start;
|
7357
8158
|
parser->next_start = NULL;
|
@@ -7394,6 +8195,10 @@ parser_lex(pm_parser_t *parser) {
|
|
7394
8195
|
const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
|
7395
8196
|
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7396
8197
|
|
8198
|
+
// If we haven't found an escape yet, then this buffer will be
|
8199
|
+
// unallocated since we can refer directly to the source string.
|
8200
|
+
pm_token_buffer_t token_buffer = { 0 };
|
8201
|
+
|
7397
8202
|
while (breakpoint != NULL) {
|
7398
8203
|
// If we hit a null byte, skip directly past it.
|
7399
8204
|
if (*breakpoint == '\0') {
|
@@ -7405,16 +8210,18 @@ parser_lex(pm_parser_t *parser) {
|
|
7405
8210
|
// now, so we can return an element of the list.
|
7406
8211
|
if (pm_char_is_whitespace(*breakpoint)) {
|
7407
8212
|
parser->current.end = breakpoint;
|
8213
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
7408
8214
|
LEX(PM_TOKEN_STRING_CONTENT);
|
7409
8215
|
}
|
7410
8216
|
|
7411
|
-
//If we hit the terminator, we need to check which token to
|
8217
|
+
// If we hit the terminator, we need to check which token to
|
7412
8218
|
// return.
|
7413
8219
|
if (*breakpoint == lex_mode->as.list.terminator) {
|
7414
8220
|
// If this terminator doesn't actually close the list, then
|
7415
8221
|
// we need to continue on past it.
|
7416
8222
|
if (lex_mode->as.list.nesting > 0) {
|
7417
|
-
|
8223
|
+
parser->current.end = breakpoint + 1;
|
8224
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7418
8225
|
lex_mode->as.list.nesting--;
|
7419
8226
|
continue;
|
7420
8227
|
}
|
@@ -7423,6 +8230,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7423
8230
|
// past content, then we can return a list node.
|
7424
8231
|
if (breakpoint > parser->current.start) {
|
7425
8232
|
parser->current.end = breakpoint;
|
8233
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
7426
8234
|
LEX(PM_TOKEN_STRING_CONTENT);
|
7427
8235
|
}
|
7428
8236
|
|
@@ -7438,59 +8246,109 @@ parser_lex(pm_parser_t *parser) {
|
|
7438
8246
|
// literally. In this case we'll skip past the next character
|
7439
8247
|
// and find the next breakpoint.
|
7440
8248
|
if (*breakpoint == '\\') {
|
7441
|
-
|
7442
|
-
|
7443
|
-
|
7444
|
-
|
8249
|
+
parser->current.end = breakpoint + 1;
|
8250
|
+
|
8251
|
+
// If we've hit the end of the file, then break out of the
|
8252
|
+
// loop by setting the breakpoint to NULL.
|
8253
|
+
if (parser->current.end == parser->end) {
|
7445
8254
|
breakpoint = NULL;
|
7446
8255
|
continue;
|
7447
8256
|
}
|
7448
8257
|
|
7449
|
-
|
7450
|
-
|
7451
|
-
|
7452
|
-
|
7453
|
-
|
7454
|
-
|
7455
|
-
|
7456
|
-
|
7457
|
-
|
7458
|
-
|
7459
|
-
|
7460
|
-
|
8258
|
+
pm_token_buffer_escape(parser, &token_buffer);
|
8259
|
+
uint8_t peeked = peek(parser);
|
8260
|
+
|
8261
|
+
switch (peeked) {
|
8262
|
+
case ' ':
|
8263
|
+
case '\f':
|
8264
|
+
case '\t':
|
8265
|
+
case '\v':
|
8266
|
+
case '\\':
|
8267
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8268
|
+
parser->current.end++;
|
8269
|
+
break;
|
8270
|
+
case '\r':
|
8271
|
+
parser->current.end++;
|
8272
|
+
if (peek(parser) != '\n') {
|
8273
|
+
pm_token_buffer_push(&token_buffer, '\r');
|
8274
|
+
break;
|
8275
|
+
}
|
8276
|
+
/* fallthrough */
|
8277
|
+
case '\n':
|
8278
|
+
pm_token_buffer_push(&token_buffer, '\n');
|
8279
|
+
|
8280
|
+
if (parser->heredoc_end) {
|
8281
|
+
// ... if we are on the same line as a heredoc,
|
8282
|
+
// flush the heredoc and continue parsing after
|
8283
|
+
// heredoc_end.
|
8284
|
+
parser_flush_heredoc_end(parser);
|
8285
|
+
pm_token_buffer_copy(parser, &token_buffer);
|
8286
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
8287
|
+
} else {
|
8288
|
+
// ... else track the newline.
|
8289
|
+
pm_newline_list_append(&parser->newline_list, parser->current.end);
|
8290
|
+
}
|
8291
|
+
|
8292
|
+
parser->current.end++;
|
8293
|
+
break;
|
8294
|
+
default:
|
8295
|
+
if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
|
8296
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8297
|
+
parser->current.end++;
|
8298
|
+
} else if (lex_mode->as.list.interpolation) {
|
8299
|
+
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
8300
|
+
} else {
|
8301
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8302
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8303
|
+
parser->current.end++;
|
8304
|
+
}
|
8305
|
+
|
8306
|
+
break;
|
7461
8307
|
}
|
7462
8308
|
|
7463
|
-
|
8309
|
+
token_buffer.cursor = parser->current.end;
|
8310
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7464
8311
|
continue;
|
7465
8312
|
}
|
7466
8313
|
|
7467
8314
|
// If we hit a #, then we will attempt to lex interpolation.
|
7468
8315
|
if (*breakpoint == '#') {
|
7469
8316
|
pm_token_type_t type = lex_interpolation(parser, breakpoint);
|
7470
|
-
|
7471
|
-
|
8317
|
+
|
8318
|
+
if (type == PM_TOKEN_NOT_PROVIDED) {
|
8319
|
+
// If we haven't returned at this point then we had something
|
8320
|
+
// that looked like an interpolated class or instance variable
|
8321
|
+
// like "#@" but wasn't actually. In this case we'll just skip
|
8322
|
+
// to the next breakpoint.
|
8323
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
8324
|
+
continue;
|
7472
8325
|
}
|
7473
8326
|
|
7474
|
-
|
7475
|
-
|
7476
|
-
|
7477
|
-
|
7478
|
-
|
7479
|
-
continue;
|
8327
|
+
if (type == PM_TOKEN_STRING_CONTENT) {
|
8328
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8329
|
+
}
|
8330
|
+
|
8331
|
+
LEX(type);
|
7480
8332
|
}
|
7481
8333
|
|
7482
8334
|
// If we've hit the incrementor, then we need to skip past it
|
7483
8335
|
// and find the next breakpoint.
|
7484
8336
|
assert(*breakpoint == lex_mode->as.list.incrementor);
|
7485
|
-
|
8337
|
+
parser->current.end = breakpoint + 1;
|
8338
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7486
8339
|
lex_mode->as.list.nesting++;
|
7487
8340
|
continue;
|
7488
8341
|
}
|
7489
8342
|
|
7490
|
-
|
7491
|
-
|
7492
|
-
|
8343
|
+
if (parser->current.end > parser->current.start) {
|
8344
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8345
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
8346
|
+
}
|
7493
8347
|
|
8348
|
+
// If we were unable to find a breakpoint, then this token hits the
|
8349
|
+
// end of the file.
|
8350
|
+
LEX(PM_TOKEN_EOF);
|
8351
|
+
}
|
7494
8352
|
case PM_LEX_REGEXP: {
|
7495
8353
|
// First, we'll set to start of this token to be the current end.
|
7496
8354
|
if (parser->next_start == NULL) {
|
@@ -7515,11 +8373,13 @@ parser_lex(pm_parser_t *parser) {
|
|
7515
8373
|
// characters.
|
7516
8374
|
const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
|
7517
8375
|
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
8376
|
+
pm_token_buffer_t token_buffer = { 0 };
|
7518
8377
|
|
7519
8378
|
while (breakpoint != NULL) {
|
7520
8379
|
// If we hit a null byte, skip directly past it.
|
7521
8380
|
if (*breakpoint == '\0') {
|
7522
|
-
|
8381
|
+
parser->current.end = breakpoint + 1;
|
8382
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7523
8383
|
continue;
|
7524
8384
|
}
|
7525
8385
|
|
@@ -7540,7 +8400,8 @@ parser_lex(pm_parser_t *parser) {
|
|
7540
8400
|
if (lex_mode->as.regexp.terminator != '\n') {
|
7541
8401
|
// If the terminator is not a newline, then we can set
|
7542
8402
|
// the next breakpoint and continue.
|
7543
|
-
|
8403
|
+
parser->current.end = breakpoint + 1;
|
8404
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7544
8405
|
continue;
|
7545
8406
|
}
|
7546
8407
|
}
|
@@ -7549,7 +8410,8 @@ parser_lex(pm_parser_t *parser) {
|
|
7549
8410
|
// token to return.
|
7550
8411
|
if (*breakpoint == lex_mode->as.regexp.terminator) {
|
7551
8412
|
if (lex_mode->as.regexp.nesting > 0) {
|
7552
|
-
|
8413
|
+
parser->current.end = breakpoint + 1;
|
8414
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7553
8415
|
lex_mode->as.regexp.nesting--;
|
7554
8416
|
continue;
|
7555
8417
|
}
|
@@ -7559,11 +8421,12 @@ parser_lex(pm_parser_t *parser) {
|
|
7559
8421
|
// first.
|
7560
8422
|
if (breakpoint > parser->current.start) {
|
7561
8423
|
parser->current.end = breakpoint;
|
8424
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
7562
8425
|
LEX(PM_TOKEN_STRING_CONTENT);
|
7563
8426
|
}
|
7564
8427
|
|
7565
|
-
// Since we've hit the terminator of the regular expression,
|
7566
|
-
// need to parse the options.
|
8428
|
+
// Since we've hit the terminator of the regular expression,
|
8429
|
+
// we now need to parse the options.
|
7567
8430
|
parser->current.end = breakpoint + 1;
|
7568
8431
|
parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
|
7569
8432
|
|
@@ -7576,56 +8439,103 @@ parser_lex(pm_parser_t *parser) {
|
|
7576
8439
|
// literally. In this case we'll skip past the next character
|
7577
8440
|
// and find the next breakpoint.
|
7578
8441
|
if (*breakpoint == '\\') {
|
7579
|
-
|
7580
|
-
|
7581
|
-
|
8442
|
+
parser->current.end = breakpoint + 1;
|
8443
|
+
|
8444
|
+
// If we've hit the end of the file, then break out of the
|
8445
|
+
// loop by setting the breakpoint to NULL.
|
8446
|
+
if (parser->current.end == parser->end) {
|
7582
8447
|
breakpoint = NULL;
|
7583
8448
|
continue;
|
7584
8449
|
}
|
7585
8450
|
|
7586
|
-
|
7587
|
-
|
7588
|
-
|
7589
|
-
|
7590
|
-
|
7591
|
-
parser->current.end
|
7592
|
-
|
7593
|
-
|
7594
|
-
|
7595
|
-
|
7596
|
-
|
7597
|
-
|
8451
|
+
pm_token_buffer_escape(parser, &token_buffer);
|
8452
|
+
uint8_t peeked = peek(parser);
|
8453
|
+
|
8454
|
+
switch (peeked) {
|
8455
|
+
case '\r':
|
8456
|
+
parser->current.end++;
|
8457
|
+
if (peek(parser) != '\n') {
|
8458
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8459
|
+
pm_token_buffer_push(&token_buffer, '\r');
|
8460
|
+
break;
|
8461
|
+
}
|
8462
|
+
/* fallthrough */
|
8463
|
+
case '\n':
|
8464
|
+
if (parser->heredoc_end) {
|
8465
|
+
// ... if we are on the same line as a heredoc,
|
8466
|
+
// flush the heredoc and continue parsing after
|
8467
|
+
// heredoc_end.
|
8468
|
+
parser_flush_heredoc_end(parser);
|
8469
|
+
pm_token_buffer_copy(parser, &token_buffer);
|
8470
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
8471
|
+
} else {
|
8472
|
+
// ... else track the newline.
|
8473
|
+
pm_newline_list_append(&parser->newline_list, parser->current.end);
|
8474
|
+
}
|
8475
|
+
|
8476
|
+
parser->current.end++;
|
8477
|
+
break;
|
8478
|
+
case 'c':
|
8479
|
+
case 'C':
|
8480
|
+
case 'M':
|
8481
|
+
case 'u':
|
8482
|
+
case 'x':
|
8483
|
+
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_REGEXP);
|
8484
|
+
break;
|
8485
|
+
default:
|
8486
|
+
if (lex_mode->as.regexp.terminator == '/' && peeked == '/') {
|
8487
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8488
|
+
parser->current.end++;
|
8489
|
+
break;
|
8490
|
+
}
|
8491
|
+
|
8492
|
+
if (peeked < 0x80) pm_token_buffer_push(&token_buffer, '\\');
|
8493
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8494
|
+
parser->current.end++;
|
8495
|
+
break;
|
7598
8496
|
}
|
7599
8497
|
|
7600
|
-
|
8498
|
+
token_buffer.cursor = parser->current.end;
|
8499
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7601
8500
|
continue;
|
7602
8501
|
}
|
7603
8502
|
|
7604
8503
|
// If we hit a #, then we will attempt to lex interpolation.
|
7605
8504
|
if (*breakpoint == '#') {
|
7606
8505
|
pm_token_type_t type = lex_interpolation(parser, breakpoint);
|
7607
|
-
|
7608
|
-
|
8506
|
+
|
8507
|
+
if (type == PM_TOKEN_NOT_PROVIDED) {
|
8508
|
+
// If we haven't returned at this point then we had
|
8509
|
+
// something that looked like an interpolated class or
|
8510
|
+
// instance variable like "#@" but wasn't actually. In
|
8511
|
+
// this case we'll just skip to the next breakpoint.
|
8512
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
8513
|
+
continue;
|
7609
8514
|
}
|
7610
8515
|
|
7611
|
-
|
7612
|
-
|
7613
|
-
|
7614
|
-
|
7615
|
-
|
7616
|
-
continue;
|
8516
|
+
if (type == PM_TOKEN_STRING_CONTENT) {
|
8517
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8518
|
+
}
|
8519
|
+
|
8520
|
+
LEX(type);
|
7617
8521
|
}
|
7618
8522
|
|
7619
8523
|
// If we've hit the incrementor, then we need to skip past it
|
7620
8524
|
// and find the next breakpoint.
|
7621
8525
|
assert(*breakpoint == lex_mode->as.regexp.incrementor);
|
7622
|
-
|
8526
|
+
parser->current.end = breakpoint + 1;
|
8527
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7623
8528
|
lex_mode->as.regexp.nesting++;
|
7624
8529
|
continue;
|
7625
8530
|
}
|
7626
8531
|
|
7627
|
-
|
7628
|
-
|
8532
|
+
if (parser->current.end > parser->current.start) {
|
8533
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8534
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
8535
|
+
}
|
8536
|
+
|
8537
|
+
// If we were unable to find a breakpoint, then this token hits the
|
8538
|
+
// end of the file.
|
7629
8539
|
LEX(PM_TOKEN_EOF);
|
7630
8540
|
}
|
7631
8541
|
case PM_LEX_STRING: {
|
@@ -7646,30 +8556,34 @@ parser_lex(pm_parser_t *parser) {
|
|
7646
8556
|
|
7647
8557
|
// These are the places where we need to split up the content of the
|
7648
8558
|
// string. We'll use strpbrk to find the first of these characters.
|
7649
|
-
|
8559
|
+
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
8560
|
+
const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
|
7650
8561
|
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7651
8562
|
|
8563
|
+
// If we haven't found an escape yet, then this buffer will be
|
8564
|
+
// unallocated since we can refer directly to the source string.
|
8565
|
+
pm_token_buffer_t token_buffer = { 0 };
|
8566
|
+
|
7652
8567
|
while (breakpoint != NULL) {
|
7653
8568
|
// If we hit the incrementor, then we'll increment then nesting and
|
7654
8569
|
// continue lexing.
|
7655
|
-
if (
|
7656
|
-
|
7657
|
-
|
7658
|
-
|
7659
|
-
parser->lex_modes.current->as.string.nesting++;
|
7660
|
-
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
8570
|
+
if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
|
8571
|
+
lex_mode->as.string.nesting++;
|
8572
|
+
parser->current.end = breakpoint + 1;
|
8573
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7661
8574
|
continue;
|
7662
8575
|
}
|
7663
8576
|
|
7664
8577
|
// Note that we have to check the terminator here first because we could
|
7665
8578
|
// potentially be parsing a % string that has a # character as the
|
7666
8579
|
// terminator.
|
7667
|
-
if (*breakpoint ==
|
8580
|
+
if (*breakpoint == lex_mode->as.string.terminator) {
|
7668
8581
|
// If this terminator doesn't actually close the string, then we need
|
7669
8582
|
// to continue on past it.
|
7670
|
-
if (
|
7671
|
-
|
7672
|
-
parser->
|
8583
|
+
if (lex_mode->as.string.nesting > 0) {
|
8584
|
+
parser->current.end = breakpoint + 1;
|
8585
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
8586
|
+
lex_mode->as.string.nesting--;
|
7673
8587
|
continue;
|
7674
8588
|
}
|
7675
8589
|
|
@@ -7677,6 +8591,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7677
8591
|
// then we need to return that content as string content first.
|
7678
8592
|
if (breakpoint > parser->current.start) {
|
7679
8593
|
parser->current.end = breakpoint;
|
8594
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
7680
8595
|
LEX(PM_TOKEN_STRING_CONTENT);
|
7681
8596
|
}
|
7682
8597
|
|
@@ -7690,11 +8605,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7690
8605
|
parser->current.end = breakpoint + 1;
|
7691
8606
|
}
|
7692
8607
|
|
7693
|
-
if (
|
7694
|
-
parser->lex_modes.current->as.string.label_allowed &&
|
7695
|
-
(peek(parser) == ':') &&
|
7696
|
-
(peek_offset(parser, 1) != ':')
|
7697
|
-
) {
|
8608
|
+
if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
|
7698
8609
|
parser->current.end++;
|
7699
8610
|
lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
|
7700
8611
|
lex_mode_pop(parser);
|
@@ -7712,11 +8623,13 @@ parser_lex(pm_parser_t *parser) {
|
|
7712
8623
|
if (*breakpoint == '\n') {
|
7713
8624
|
if (parser->heredoc_end == NULL) {
|
7714
8625
|
pm_newline_list_append(&parser->newline_list, breakpoint);
|
7715
|
-
|
8626
|
+
parser->current.end = breakpoint + 1;
|
8627
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7716
8628
|
continue;
|
7717
8629
|
} else {
|
7718
8630
|
parser->current.end = breakpoint + 1;
|
7719
8631
|
parser_flush_heredoc_end(parser);
|
8632
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
7720
8633
|
LEX(PM_TOKEN_STRING_CONTENT);
|
7721
8634
|
}
|
7722
8635
|
}
|
@@ -7724,58 +8637,110 @@ parser_lex(pm_parser_t *parser) {
|
|
7724
8637
|
switch (*breakpoint) {
|
7725
8638
|
case '\0':
|
7726
8639
|
// Skip directly past the null character.
|
7727
|
-
|
8640
|
+
parser->current.end = breakpoint + 1;
|
8641
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7728
8642
|
break;
|
7729
8643
|
case '\\': {
|
7730
|
-
//
|
7731
|
-
|
7732
|
-
|
7733
|
-
|
7734
|
-
|
7735
|
-
if (
|
7736
|
-
// we're at the end of the file
|
8644
|
+
// Here we hit escapes.
|
8645
|
+
parser->current.end = breakpoint + 1;
|
8646
|
+
|
8647
|
+
// If we've hit the end of the file, then break out of
|
8648
|
+
// the loop by setting the breakpoint to NULL.
|
8649
|
+
if (parser->current.end == parser->end) {
|
7737
8650
|
breakpoint = NULL;
|
7738
|
-
|
8651
|
+
continue;
|
7739
8652
|
}
|
7740
8653
|
|
7741
|
-
|
7742
|
-
|
7743
|
-
|
7744
|
-
|
7745
|
-
|
7746
|
-
|
7747
|
-
|
7748
|
-
|
7749
|
-
|
7750
|
-
|
7751
|
-
|
7752
|
-
|
8654
|
+
pm_token_buffer_escape(parser, &token_buffer);
|
8655
|
+
uint8_t peeked = peek(parser);
|
8656
|
+
|
8657
|
+
switch (peeked) {
|
8658
|
+
case '\\':
|
8659
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8660
|
+
parser->current.end++;
|
8661
|
+
break;
|
8662
|
+
case '\r':
|
8663
|
+
parser->current.end++;
|
8664
|
+
if (peek(parser) != '\n') {
|
8665
|
+
if (!lex_mode->as.string.interpolation) {
|
8666
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8667
|
+
}
|
8668
|
+
pm_token_buffer_push(&token_buffer, '\r');
|
8669
|
+
break;
|
8670
|
+
}
|
8671
|
+
/* fallthrough */
|
8672
|
+
case '\n':
|
8673
|
+
if (!lex_mode->as.string.interpolation) {
|
8674
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8675
|
+
pm_token_buffer_push(&token_buffer, '\n');
|
8676
|
+
}
|
8677
|
+
|
8678
|
+
if (parser->heredoc_end) {
|
8679
|
+
// ... if we are on the same line as a heredoc,
|
8680
|
+
// flush the heredoc and continue parsing after
|
8681
|
+
// heredoc_end.
|
8682
|
+
parser_flush_heredoc_end(parser);
|
8683
|
+
pm_token_buffer_copy(parser, &token_buffer);
|
8684
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
8685
|
+
} else {
|
8686
|
+
// ... else track the newline.
|
8687
|
+
pm_newline_list_append(&parser->newline_list, parser->current.end);
|
8688
|
+
}
|
8689
|
+
|
8690
|
+
parser->current.end++;
|
8691
|
+
break;
|
8692
|
+
default:
|
8693
|
+
if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
|
8694
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8695
|
+
parser->current.end++;
|
8696
|
+
} else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
|
8697
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8698
|
+
parser->current.end++;
|
8699
|
+
} else if (lex_mode->as.string.interpolation) {
|
8700
|
+
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
8701
|
+
} else {
|
8702
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8703
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8704
|
+
parser->current.end++;
|
8705
|
+
}
|
8706
|
+
|
8707
|
+
break;
|
7753
8708
|
}
|
7754
8709
|
|
7755
|
-
|
8710
|
+
token_buffer.cursor = parser->current.end;
|
8711
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7756
8712
|
break;
|
7757
8713
|
}
|
7758
8714
|
case '#': {
|
7759
8715
|
pm_token_type_t type = lex_interpolation(parser, breakpoint);
|
7760
|
-
|
7761
|
-
|
8716
|
+
|
8717
|
+
if (type == PM_TOKEN_NOT_PROVIDED) {
|
8718
|
+
// If we haven't returned at this point then we had something that
|
8719
|
+
// looked like an interpolated class or instance variable like "#@"
|
8720
|
+
// but wasn't actually. In this case we'll just skip to the next
|
8721
|
+
// breakpoint.
|
8722
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
8723
|
+
break;
|
7762
8724
|
}
|
7763
8725
|
|
7764
|
-
|
7765
|
-
|
7766
|
-
|
7767
|
-
|
7768
|
-
|
7769
|
-
break;
|
8726
|
+
if (type == PM_TOKEN_STRING_CONTENT) {
|
8727
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8728
|
+
}
|
8729
|
+
|
8730
|
+
LEX(type);
|
7770
8731
|
}
|
7771
8732
|
default:
|
7772
8733
|
assert(false && "unreachable");
|
7773
8734
|
}
|
7774
8735
|
}
|
7775
8736
|
|
8737
|
+
if (parser->current.end > parser->current.start) {
|
8738
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8739
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
8740
|
+
}
|
8741
|
+
|
7776
8742
|
// If we've hit the end of the string, then this is an unterminated
|
7777
8743
|
// string. In that case we'll return the EOF token.
|
7778
|
-
parser->current.end = parser->end;
|
7779
8744
|
LEX(PM_TOKEN_EOF);
|
7780
8745
|
}
|
7781
8746
|
case PM_LEX_HEREDOC: {
|
@@ -7797,16 +8762,15 @@ parser_lex(pm_parser_t *parser) {
|
|
7797
8762
|
|
7798
8763
|
// Now let's grab the information about the identifier off of the current
|
7799
8764
|
// lex mode.
|
7800
|
-
|
7801
|
-
|
8765
|
+
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
8766
|
+
const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
|
8767
|
+
size_t ident_length = lex_mode->as.heredoc.ident_length;
|
7802
8768
|
|
7803
8769
|
// If we are immediately following a newline and we have hit the
|
7804
8770
|
// terminator, then we need to return the ending of the heredoc.
|
7805
8771
|
if (current_token_starts_line(parser)) {
|
7806
8772
|
const uint8_t *start = parser->current.start;
|
7807
|
-
|
7808
|
-
start += pm_strspn_inline_whitespace(start, parser->end - start);
|
7809
|
-
}
|
8773
|
+
size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
|
7810
8774
|
|
7811
8775
|
if ((start + ident_length <= parser->end) && (memcmp(start, ident_start, ident_length) == 0)) {
|
7812
8776
|
bool matched = true;
|
@@ -7824,10 +8788,10 @@ parser_lex(pm_parser_t *parser) {
|
|
7824
8788
|
}
|
7825
8789
|
|
7826
8790
|
if (matched) {
|
7827
|
-
if (*
|
8791
|
+
if (*lex_mode->as.heredoc.next_start == '\\') {
|
7828
8792
|
parser->next_start = NULL;
|
7829
8793
|
} else {
|
7830
|
-
parser->next_start =
|
8794
|
+
parser->next_start = lex_mode->as.heredoc.next_start;
|
7831
8795
|
parser->heredoc_end = parser->current.end;
|
7832
8796
|
}
|
7833
8797
|
|
@@ -7838,61 +8802,91 @@ parser_lex(pm_parser_t *parser) {
|
|
7838
8802
|
LEX(PM_TOKEN_HEREDOC_END);
|
7839
8803
|
}
|
7840
8804
|
}
|
8805
|
+
|
8806
|
+
if (
|
8807
|
+
lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE &&
|
8808
|
+
(lex_mode->as.heredoc.common_whitespace > whitespace) &&
|
8809
|
+
peek_at(parser, start) != '\n'
|
8810
|
+
) {
|
8811
|
+
lex_mode->as.heredoc.common_whitespace = whitespace;
|
8812
|
+
}
|
7841
8813
|
}
|
7842
8814
|
|
7843
|
-
// Otherwise we'll be parsing string content. These are the places
|
7844
|
-
// we need to split up the content of the heredoc. We'll use
|
7845
|
-
// find the first of these characters.
|
8815
|
+
// Otherwise we'll be parsing string content. These are the places
|
8816
|
+
// where we need to split up the content of the heredoc. We'll use
|
8817
|
+
// strpbrk to find the first of these characters.
|
7846
8818
|
uint8_t breakpoints[] = "\n\\#";
|
7847
8819
|
|
7848
|
-
pm_heredoc_quote_t quote =
|
8820
|
+
pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
|
7849
8821
|
if (quote == PM_HEREDOC_QUOTE_SINGLE) {
|
7850
8822
|
breakpoints[2] = '\0';
|
7851
8823
|
}
|
7852
8824
|
|
7853
8825
|
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
8826
|
+
pm_token_buffer_t token_buffer = { 0 };
|
8827
|
+
bool was_escaped_newline = false;
|
7854
8828
|
|
7855
8829
|
while (breakpoint != NULL) {
|
7856
8830
|
switch (*breakpoint) {
|
7857
8831
|
case '\0':
|
7858
8832
|
// Skip directly past the null character.
|
7859
|
-
|
8833
|
+
parser->current.end = breakpoint + 1;
|
8834
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7860
8835
|
break;
|
7861
8836
|
case '\n': {
|
7862
8837
|
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
|
7863
8838
|
parser_flush_heredoc_end(parser);
|
7864
8839
|
parser->current.end = breakpoint + 1;
|
8840
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
7865
8841
|
LEX(PM_TOKEN_STRING_CONTENT);
|
7866
8842
|
}
|
7867
8843
|
|
7868
8844
|
pm_newline_list_append(&parser->newline_list, breakpoint);
|
7869
8845
|
|
8846
|
+
// If we have a - or ~ heredoc, then we can match after
|
8847
|
+
// some leading whitespace.
|
7870
8848
|
const uint8_t *start = breakpoint + 1;
|
7871
|
-
|
7872
|
-
start += pm_strspn_inline_whitespace(start, parser->end - start);
|
7873
|
-
}
|
8849
|
+
size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
|
7874
8850
|
|
7875
|
-
// If we have hit a newline that is followed by a valid
|
7876
|
-
// then we need to return the content of the
|
7877
|
-
// content. Then, the next time a
|
7878
|
-
// again and return the
|
8851
|
+
// If we have hit a newline that is followed by a valid
|
8852
|
+
// terminator, then we need to return the content of the
|
8853
|
+
// heredoc here as string content. Then, the next time a
|
8854
|
+
// token is lexed, it will match again and return the
|
8855
|
+
// end of the heredoc.
|
7879
8856
|
if (
|
8857
|
+
!was_escaped_newline &&
|
7880
8858
|
(start + ident_length <= parser->end) &&
|
7881
8859
|
(memcmp(start, ident_start, ident_length) == 0)
|
7882
8860
|
) {
|
7883
|
-
// Heredoc terminators must be followed by a
|
8861
|
+
// Heredoc terminators must be followed by a
|
8862
|
+
// newline, CRLF, or EOF to be valid.
|
7884
8863
|
if (
|
7885
8864
|
start + ident_length == parser->end ||
|
7886
8865
|
match_eol_at(parser, start + ident_length)
|
7887
8866
|
) {
|
7888
8867
|
parser->current.end = breakpoint + 1;
|
8868
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8869
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
8870
|
+
}
|
8871
|
+
}
|
8872
|
+
|
8873
|
+
if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
|
8874
|
+
if ((lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
|
8875
|
+
lex_mode->as.heredoc.common_whitespace = whitespace;
|
8876
|
+
}
|
8877
|
+
|
8878
|
+
parser->current.end = breakpoint + 1;
|
8879
|
+
|
8880
|
+
if (!was_escaped_newline) {
|
8881
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
7889
8882
|
LEX(PM_TOKEN_STRING_CONTENT);
|
7890
8883
|
}
|
7891
8884
|
}
|
7892
8885
|
|
7893
|
-
// Otherwise we hit a newline and it wasn't followed by
|
7894
|
-
// terminator, so we can continue parsing.
|
7895
|
-
|
8886
|
+
// Otherwise we hit a newline and it wasn't followed by
|
8887
|
+
// a terminator, so we can continue parsing.
|
8888
|
+
parser->current.end = breakpoint + 1;
|
8889
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7896
8890
|
break;
|
7897
8891
|
}
|
7898
8892
|
case '\\': {
|
@@ -7902,46 +8896,98 @@ parser_lex(pm_parser_t *parser) {
|
|
7902
8896
|
// stop looping before the newline and not after the
|
7903
8897
|
// newline so that we can still potentially find the
|
7904
8898
|
// terminator of the heredoc.
|
7905
|
-
|
7906
|
-
|
7907
|
-
|
7908
|
-
|
7909
|
-
|
7910
|
-
|
7911
|
-
|
7912
|
-
|
7913
|
-
breakpoint = NULL;
|
7914
|
-
break;
|
7915
|
-
}
|
8899
|
+
parser->current.end = breakpoint + 1;
|
8900
|
+
|
8901
|
+
// If we've hit the end of the file, then break out of
|
8902
|
+
// the loop by setting the breakpoint to NULL.
|
8903
|
+
if (parser->current.end == parser->end) {
|
8904
|
+
breakpoint = NULL;
|
8905
|
+
continue;
|
8906
|
+
}
|
7916
8907
|
|
7917
|
-
|
8908
|
+
pm_token_buffer_escape(parser, &token_buffer);
|
8909
|
+
uint8_t peeked = peek(parser);
|
7918
8910
|
|
7919
|
-
|
8911
|
+
if (quote == PM_HEREDOC_QUOTE_SINGLE) {
|
8912
|
+
switch (peeked) {
|
8913
|
+
case '\r':
|
8914
|
+
parser->current.end++;
|
8915
|
+
if (peek(parser) != '\n') {
|
8916
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8917
|
+
pm_token_buffer_push(&token_buffer, '\r');
|
8918
|
+
break;
|
8919
|
+
}
|
8920
|
+
/* fallthrough */
|
8921
|
+
case '\n':
|
8922
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8923
|
+
pm_token_buffer_push(&token_buffer, '\n');
|
8924
|
+
token_buffer.cursor = parser->current.end + 1;
|
8925
|
+
breakpoint = parser->current.end;
|
8926
|
+
continue;
|
8927
|
+
default:
|
8928
|
+
parser->current.end++;
|
8929
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8930
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8931
|
+
break;
|
8932
|
+
}
|
8933
|
+
} else {
|
8934
|
+
switch (peeked) {
|
8935
|
+
case '\r':
|
8936
|
+
parser->current.end++;
|
8937
|
+
if (peek(parser) != '\n') {
|
8938
|
+
pm_token_buffer_push(&token_buffer, '\r');
|
8939
|
+
break;
|
8940
|
+
}
|
8941
|
+
/* fallthrough */
|
8942
|
+
case '\n':
|
8943
|
+
was_escaped_newline = true;
|
8944
|
+
token_buffer.cursor = parser->current.end + 1;
|
8945
|
+
breakpoint = parser->current.end;
|
8946
|
+
continue;
|
8947
|
+
default:
|
8948
|
+
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
8949
|
+
break;
|
8950
|
+
}
|
7920
8951
|
}
|
7921
8952
|
|
8953
|
+
token_buffer.cursor = parser->current.end;
|
8954
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7922
8955
|
break;
|
7923
8956
|
}
|
7924
8957
|
case '#': {
|
7925
8958
|
pm_token_type_t type = lex_interpolation(parser, breakpoint);
|
7926
|
-
|
7927
|
-
|
8959
|
+
|
8960
|
+
if (type == PM_TOKEN_NOT_PROVIDED) {
|
8961
|
+
// If we haven't returned at this point then we had
|
8962
|
+
// something that looked like an interpolated class
|
8963
|
+
// or instance variable like "#@" but wasn't
|
8964
|
+
// actually. In this case we'll just skip to the
|
8965
|
+
// next breakpoint.
|
8966
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
8967
|
+
break;
|
7928
8968
|
}
|
7929
8969
|
|
7930
|
-
|
7931
|
-
|
7932
|
-
|
7933
|
-
|
7934
|
-
|
7935
|
-
break;
|
8970
|
+
if (type == PM_TOKEN_STRING_CONTENT) {
|
8971
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8972
|
+
}
|
8973
|
+
|
8974
|
+
LEX(type);
|
7936
8975
|
}
|
7937
8976
|
default:
|
7938
8977
|
assert(false && "unreachable");
|
7939
8978
|
}
|
8979
|
+
|
8980
|
+
was_escaped_newline = false;
|
8981
|
+
}
|
8982
|
+
|
8983
|
+
if (parser->current.end > parser->current.start) {
|
8984
|
+
parser->current.end = parser->end;
|
8985
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8986
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
7940
8987
|
}
|
7941
8988
|
|
7942
8989
|
// If we've hit the end of the string, then this is an unterminated
|
7943
8990
|
// heredoc. In that case we'll return the EOF token.
|
7944
|
-
parser->current.end = parser->end;
|
7945
8991
|
LEX(PM_TOKEN_EOF);
|
7946
8992
|
}
|
7947
8993
|
}
|
@@ -7955,67 +9001,6 @@ parser_lex(pm_parser_t *parser) {
|
|
7955
9001
|
/* Parse functions */
|
7956
9002
|
/******************************************************************************/
|
7957
9003
|
|
7958
|
-
// When we are parsing certain content, we need to unescape the content to
|
7959
|
-
// provide to the consumers of the parser. The following functions accept a range
|
7960
|
-
// of characters from the source and unescapes into the provided type.
|
7961
|
-
//
|
7962
|
-
// We have functions for unescaping regular expression nodes, string nodes,
|
7963
|
-
// symbol nodes, and xstring nodes
|
7964
|
-
static pm_regular_expression_node_t *
|
7965
|
-
pm_regular_expression_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
|
7966
|
-
pm_regular_expression_node_t *node = pm_regular_expression_node_create(parser, opening, content, closing);
|
7967
|
-
|
7968
|
-
assert((content->end - content->start) >= 0);
|
7969
|
-
pm_string_shared_init(&node->unescaped, content->start, content->end);
|
7970
|
-
|
7971
|
-
pm_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
|
7972
|
-
return node;
|
7973
|
-
}
|
7974
|
-
|
7975
|
-
static pm_symbol_node_t *
|
7976
|
-
pm_symbol_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
|
7977
|
-
pm_symbol_node_t *node = pm_symbol_node_create(parser, opening, content, closing);
|
7978
|
-
|
7979
|
-
assert((content->end - content->start) >= 0);
|
7980
|
-
pm_string_shared_init(&node->unescaped, content->start, content->end);
|
7981
|
-
|
7982
|
-
pm_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
|
7983
|
-
return node;
|
7984
|
-
}
|
7985
|
-
|
7986
|
-
static pm_string_node_t *
|
7987
|
-
pm_char_literal_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
|
7988
|
-
pm_string_node_t *node = pm_string_node_create(parser, opening, content, closing);
|
7989
|
-
|
7990
|
-
assert((content->end - content->start) >= 0);
|
7991
|
-
pm_string_shared_init(&node->unescaped, content->start, content->end);
|
7992
|
-
|
7993
|
-
pm_unescape_manipulate_char_literal(parser, &node->unescaped, unescape_type);
|
7994
|
-
return node;
|
7995
|
-
}
|
7996
|
-
|
7997
|
-
static pm_string_node_t *
|
7998
|
-
pm_string_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
|
7999
|
-
pm_string_node_t *node = pm_string_node_create(parser, opening, content, closing);
|
8000
|
-
|
8001
|
-
assert((content->end - content->start) >= 0);
|
8002
|
-
pm_string_shared_init(&node->unescaped, content->start, content->end);
|
8003
|
-
|
8004
|
-
pm_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
|
8005
|
-
return node;
|
8006
|
-
}
|
8007
|
-
|
8008
|
-
static pm_x_string_node_t *
|
8009
|
-
pm_xstring_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
|
8010
|
-
pm_x_string_node_t *node = pm_xstring_node_create(parser, opening, content, closing);
|
8011
|
-
|
8012
|
-
assert((content->end - content->start) >= 0);
|
8013
|
-
pm_string_shared_init(&node->unescaped, content->start, content->end);
|
8014
|
-
|
8015
|
-
pm_unescape_manipulate_string(parser, &node->unescaped, PM_UNESCAPE_ALL);
|
8016
|
-
return node;
|
8017
|
-
}
|
8018
|
-
|
8019
9004
|
// These are the various precedence rules. Because we are using a Pratt parser,
|
8020
9005
|
// they are named binding power to represent the manner in which nodes are bound
|
8021
9006
|
// together in the stack.
|
@@ -8269,7 +9254,7 @@ expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
|
|
8269
9254
|
if (accept1(parser, type)) return;
|
8270
9255
|
|
8271
9256
|
const uint8_t *location = parser->previous.end;
|
8272
|
-
|
9257
|
+
pm_parser_err(parser, location, location, diag_id);
|
8273
9258
|
|
8274
9259
|
parser->previous.start = location;
|
8275
9260
|
parser->previous.type = PM_TOKEN_MISSING;
|
@@ -8282,7 +9267,7 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
|
|
8282
9267
|
if (accept2(parser, type1, type2)) return;
|
8283
9268
|
|
8284
9269
|
const uint8_t *location = parser->previous.end;
|
8285
|
-
|
9270
|
+
pm_parser_err(parser, location, location, diag_id);
|
8286
9271
|
|
8287
9272
|
parser->previous.start = location;
|
8288
9273
|
parser->previous.type = PM_TOKEN_MISSING;
|
@@ -8294,7 +9279,7 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
|
|
8294
9279
|
if (accept3(parser, type1, type2, type3)) return;
|
8295
9280
|
|
8296
9281
|
const uint8_t *location = parser->previous.end;
|
8297
|
-
|
9282
|
+
pm_parser_err(parser, location, location, diag_id);
|
8298
9283
|
|
8299
9284
|
parser->previous.start = location;
|
8300
9285
|
parser->previous.type = PM_TOKEN_MISSING;
|
@@ -8389,23 +9374,23 @@ parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power,
|
|
8389
9374
|
}
|
8390
9375
|
|
8391
9376
|
// Convert the name of a method into the corresponding write method name. For
|
8392
|
-
//
|
9377
|
+
// example, foo would be turned into foo=.
|
8393
9378
|
static void
|
8394
|
-
parse_write_name(
|
9379
|
+
parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
|
8395
9380
|
// The method name needs to change. If we previously had
|
8396
9381
|
// foo, we now need foo=. In this case we'll allocate a new
|
8397
9382
|
// owned string, copy the previous method name in, and
|
8398
9383
|
// append an =.
|
8399
|
-
|
9384
|
+
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
|
9385
|
+
size_t length = constant->length;
|
8400
9386
|
uint8_t *name = calloc(length + 1, sizeof(uint8_t));
|
8401
9387
|
if (name == NULL) return;
|
8402
9388
|
|
8403
|
-
memcpy(name,
|
9389
|
+
memcpy(name, constant->start, length);
|
8404
9390
|
name[length] = '=';
|
8405
9391
|
|
8406
9392
|
// Now switch the name to the new string.
|
8407
|
-
|
8408
|
-
pm_string_owned_init(string, name, length + 1);
|
9393
|
+
*name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
|
8409
9394
|
}
|
8410
9395
|
|
8411
9396
|
// Convert the given node into a valid target node.
|
@@ -8428,7 +9413,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
8428
9413
|
return target;
|
8429
9414
|
case PM_BACK_REFERENCE_READ_NODE:
|
8430
9415
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
8431
|
-
|
9416
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
8432
9417
|
return target;
|
8433
9418
|
case PM_GLOBAL_VARIABLE_READ_NODE:
|
8434
9419
|
assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
|
@@ -8436,7 +9421,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
8436
9421
|
return target;
|
8437
9422
|
case PM_LOCAL_VARIABLE_READ_NODE:
|
8438
9423
|
if (token_is_numbered_parameter(target->location.start, target->location.end)) {
|
8439
|
-
|
9424
|
+
pm_parser_err_node(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
8440
9425
|
} else {
|
8441
9426
|
assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
|
8442
9427
|
target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
|
@@ -8489,21 +9474,23 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
8489
9474
|
pm_parser_local_add_location(parser, message.start, message.end);
|
8490
9475
|
pm_node_destroy(parser, target);
|
8491
9476
|
|
9477
|
+
uint32_t depth = 0;
|
9478
|
+
for (pm_scope_t *scope = parser->current_scope; scope && scope->transparent; depth++, scope = scope->previous);
|
8492
9479
|
const pm_token_t name = { .type = PM_TOKEN_IDENTIFIER, .start = message.start, .end = message.end };
|
8493
|
-
target = (pm_node_t *) pm_local_variable_read_node_create(parser, &name,
|
9480
|
+
target = (pm_node_t *) pm_local_variable_read_node_create(parser, &name, depth);
|
8494
9481
|
|
8495
9482
|
assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
|
8496
9483
|
target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
|
8497
9484
|
|
8498
9485
|
if (token_is_numbered_parameter(message.start, message.end)) {
|
8499
|
-
|
9486
|
+
pm_parser_err_location(parser, &message, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
8500
9487
|
}
|
8501
9488
|
|
8502
9489
|
return target;
|
8503
9490
|
}
|
8504
9491
|
|
8505
9492
|
if (*call->message_loc.start == '_' || parser->encoding.alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
|
8506
|
-
parse_write_name(&call->name);
|
9493
|
+
parse_write_name(parser, &call->name);
|
8507
9494
|
return (pm_node_t *) call;
|
8508
9495
|
}
|
8509
9496
|
}
|
@@ -8518,9 +9505,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
8518
9505
|
(call->message_loc.end[-1] == ']') &&
|
8519
9506
|
(call->block == NULL)
|
8520
9507
|
) {
|
8521
|
-
//
|
8522
|
-
|
8523
|
-
pm_string_constant_init(&call->name, "[]=", 3);
|
9508
|
+
// Replace the name with "[]=".
|
9509
|
+
call->name = pm_parser_constant_id_static(parser, "[]=", 3);
|
8524
9510
|
return target;
|
8525
9511
|
}
|
8526
9512
|
}
|
@@ -8529,7 +9515,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
8529
9515
|
// In this case we have a node that we don't know how to convert
|
8530
9516
|
// into a target. We need to treat it as an error. For now, we'll
|
8531
9517
|
// mark it as an error and just skip right past it.
|
8532
|
-
|
9518
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
8533
9519
|
return target;
|
8534
9520
|
}
|
8535
9521
|
}
|
@@ -8542,7 +9528,7 @@ parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
|
|
8542
9528
|
|
8543
9529
|
// Ensure that we have either an = or a ) after the targets.
|
8544
9530
|
if (!match3(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_KEYWORD_IN)) {
|
8545
|
-
|
9531
|
+
pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
8546
9532
|
}
|
8547
9533
|
|
8548
9534
|
return result;
|
@@ -8568,7 +9554,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
8568
9554
|
}
|
8569
9555
|
case PM_BACK_REFERENCE_READ_NODE:
|
8570
9556
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
8571
|
-
|
9557
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
8572
9558
|
/* fallthrough */
|
8573
9559
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
8574
9560
|
pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
|
@@ -8577,7 +9563,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
8577
9563
|
}
|
8578
9564
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
8579
9565
|
if (token_is_numbered_parameter(target->location.start, target->location.end)) {
|
8580
|
-
|
9566
|
+
pm_parser_err_node(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
8581
9567
|
}
|
8582
9568
|
|
8583
9569
|
pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
|
@@ -8642,7 +9628,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
8642
9628
|
target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
|
8643
9629
|
|
8644
9630
|
if (token_is_numbered_parameter(message.start, message.end)) {
|
8645
|
-
|
9631
|
+
pm_parser_err_location(parser, &message, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
8646
9632
|
}
|
8647
9633
|
|
8648
9634
|
return target;
|
@@ -8665,7 +9651,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
8665
9651
|
pm_arguments_node_arguments_append(arguments, value);
|
8666
9652
|
call->base.location.end = arguments->base.location.end;
|
8667
9653
|
|
8668
|
-
parse_write_name(&call->name);
|
9654
|
+
parse_write_name(parser, &call->name);
|
8669
9655
|
return (pm_node_t *) call;
|
8670
9656
|
}
|
8671
9657
|
}
|
@@ -8686,9 +9672,8 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
8686
9672
|
pm_arguments_node_arguments_append(call->arguments, value);
|
8687
9673
|
target->location.end = value->location.end;
|
8688
9674
|
|
8689
|
-
//
|
8690
|
-
|
8691
|
-
pm_string_constant_init(&call->name, "[]=", 3);
|
9675
|
+
// Replace the name with "[]=".
|
9676
|
+
call->name = pm_parser_constant_id_static(parser, "[]=", 3);
|
8692
9677
|
return target;
|
8693
9678
|
}
|
8694
9679
|
|
@@ -8704,7 +9689,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
8704
9689
|
// In this case we have a node that we don't know how to convert into a
|
8705
9690
|
// target. We need to treat it as an error. For now, we'll mark it as an
|
8706
9691
|
// error and just skip right past it.
|
8707
|
-
|
9692
|
+
pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
8708
9693
|
return target;
|
8709
9694
|
}
|
8710
9695
|
}
|
@@ -8730,7 +9715,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
8730
9715
|
// anonymous. It can be the final target or be in the middle if
|
8731
9716
|
// there haven't been any others yet.
|
8732
9717
|
if (has_splat) {
|
8733
|
-
|
9718
|
+
pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
|
8734
9719
|
}
|
8735
9720
|
|
8736
9721
|
pm_token_t star_operator = parser->previous;
|
@@ -8770,7 +9755,7 @@ parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_
|
|
8770
9755
|
|
8771
9756
|
// Ensure that we have either an = or a ) after the targets.
|
8772
9757
|
if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
8773
|
-
|
9758
|
+
pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
8774
9759
|
}
|
8775
9760
|
|
8776
9761
|
return result;
|
@@ -8863,7 +9848,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
|
|
8863
9848
|
if (token_begins_expression_p(parser->current.type)) {
|
8864
9849
|
value = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
8865
9850
|
} else if (pm_parser_local_depth(parser, &operator) == -1) {
|
8866
|
-
|
9851
|
+
pm_parser_err_token(parser, &operator, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
8867
9852
|
}
|
8868
9853
|
|
8869
9854
|
element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
|
@@ -8970,7 +9955,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
8970
9955
|
|
8971
9956
|
while (!match1(parser, PM_TOKEN_EOF)) {
|
8972
9957
|
if (parsed_block_argument) {
|
8973
|
-
|
9958
|
+
pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
|
8974
9959
|
}
|
8975
9960
|
|
8976
9961
|
pm_node_t *argument = NULL;
|
@@ -8979,7 +9964,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
8979
9964
|
case PM_TOKEN_USTAR_STAR:
|
8980
9965
|
case PM_TOKEN_LABEL: {
|
8981
9966
|
if (parsed_bare_hash) {
|
8982
|
-
|
9967
|
+
pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
|
8983
9968
|
}
|
8984
9969
|
|
8985
9970
|
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
|
@@ -9001,7 +9986,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
9001
9986
|
if (token_begins_expression_p(parser->current.type)) {
|
9002
9987
|
expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
|
9003
9988
|
} else if (pm_parser_local_depth(parser, &operator) == -1) {
|
9004
|
-
|
9989
|
+
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
|
9005
9990
|
}
|
9006
9991
|
|
9007
9992
|
argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
|
@@ -9020,7 +10005,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
9020
10005
|
|
9021
10006
|
if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA)) {
|
9022
10007
|
if (pm_parser_local_depth(parser, &parser->previous) == -1) {
|
9023
|
-
|
10008
|
+
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
9024
10009
|
}
|
9025
10010
|
|
9026
10011
|
argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
|
@@ -9028,7 +10013,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
9028
10013
|
pm_node_t *expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
|
9029
10014
|
|
9030
10015
|
if (parsed_bare_hash) {
|
9031
|
-
|
10016
|
+
pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
|
9032
10017
|
}
|
9033
10018
|
|
9034
10019
|
argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
|
@@ -9049,7 +10034,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
9049
10034
|
argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
|
9050
10035
|
} else {
|
9051
10036
|
if (pm_parser_local_depth(parser, &parser->previous) == -1) {
|
9052
|
-
|
10037
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
9053
10038
|
}
|
9054
10039
|
|
9055
10040
|
argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
|
@@ -9066,7 +10051,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
9066
10051
|
|
9067
10052
|
if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
|
9068
10053
|
if (parsed_bare_hash) {
|
9069
|
-
|
10054
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
|
9070
10055
|
}
|
9071
10056
|
|
9072
10057
|
pm_token_t operator;
|
@@ -9145,7 +10130,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
9145
10130
|
|
9146
10131
|
if (node->parameters.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
9147
10132
|
if (parsed_splat) {
|
9148
|
-
|
10133
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT);
|
9149
10134
|
}
|
9150
10135
|
|
9151
10136
|
param = (pm_node_t *) pm_splat_node_create(parser, &parser->previous, NULL);
|
@@ -9157,7 +10142,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
9157
10142
|
param = (pm_node_t *) parse_required_destructured_parameter(parser);
|
9158
10143
|
} else if (accept1(parser, PM_TOKEN_USTAR)) {
|
9159
10144
|
if (parsed_splat) {
|
9160
|
-
|
10145
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT);
|
9161
10146
|
}
|
9162
10147
|
|
9163
10148
|
pm_token_t star = parser->previous;
|
@@ -9166,6 +10151,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
9166
10151
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
9167
10152
|
pm_token_t name = parser->previous;
|
9168
10153
|
value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
|
10154
|
+
pm_parser_parameter_name_check(parser, &name);
|
9169
10155
|
pm_parser_local_add_token(parser, &name);
|
9170
10156
|
}
|
9171
10157
|
|
@@ -9176,6 +10162,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
9176
10162
|
pm_token_t name = parser->previous;
|
9177
10163
|
|
9178
10164
|
param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
|
10165
|
+
pm_parser_parameter_name_check(parser, &name);
|
9179
10166
|
pm_parser_local_add_token(parser, &name);
|
9180
10167
|
}
|
9181
10168
|
|
@@ -9237,12 +10224,12 @@ update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_ord
|
|
9237
10224
|
}
|
9238
10225
|
|
9239
10226
|
if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
|
9240
|
-
|
10227
|
+
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
|
9241
10228
|
}
|
9242
10229
|
|
9243
10230
|
if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
|
9244
10231
|
// We know what transition we failed on, so we can provide a better error here.
|
9245
|
-
|
10232
|
+
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
|
9246
10233
|
} else if (state < *current) {
|
9247
10234
|
*current = state;
|
9248
10235
|
}
|
@@ -9297,7 +10284,7 @@ parse_parameters(
|
|
9297
10284
|
if (params->block == NULL) {
|
9298
10285
|
pm_parameters_node_block_set(params, param);
|
9299
10286
|
} else {
|
9300
|
-
|
10287
|
+
pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
|
9301
10288
|
pm_parameters_node_posts_append(params, (pm_node_t *) param);
|
9302
10289
|
}
|
9303
10290
|
|
@@ -9305,7 +10292,7 @@ parse_parameters(
|
|
9305
10292
|
}
|
9306
10293
|
case PM_TOKEN_UDOT_DOT_DOT: {
|
9307
10294
|
if (!allows_forwarding_parameter) {
|
9308
|
-
|
10295
|
+
pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
9309
10296
|
}
|
9310
10297
|
if (order > PM_PARAMETERS_ORDER_NOTHING_AFTER) {
|
9311
10298
|
update_parameter_state(parser, &parser->current, &order);
|
@@ -9318,7 +10305,7 @@ parse_parameters(
|
|
9318
10305
|
// forwarding parameter and move the keyword rest parameter to the posts list.
|
9319
10306
|
pm_node_t *keyword_rest = params->keyword_rest;
|
9320
10307
|
pm_parameters_node_posts_append(params, keyword_rest);
|
9321
|
-
|
10308
|
+
pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
|
9322
10309
|
params->keyword_rest = NULL;
|
9323
10310
|
}
|
9324
10311
|
pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
|
@@ -9337,19 +10324,19 @@ parse_parameters(
|
|
9337
10324
|
parser_lex(parser);
|
9338
10325
|
switch (parser->previous.type) {
|
9339
10326
|
case PM_TOKEN_CONSTANT:
|
9340
|
-
|
10327
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
|
9341
10328
|
break;
|
9342
10329
|
case PM_TOKEN_INSTANCE_VARIABLE:
|
9343
|
-
|
10330
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
|
9344
10331
|
break;
|
9345
10332
|
case PM_TOKEN_GLOBAL_VARIABLE:
|
9346
|
-
|
10333
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
|
9347
10334
|
break;
|
9348
10335
|
case PM_TOKEN_CLASS_VARIABLE:
|
9349
|
-
|
10336
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
|
9350
10337
|
break;
|
9351
10338
|
case PM_TOKEN_METHOD_NAME:
|
9352
|
-
|
10339
|
+
pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
|
9353
10340
|
break;
|
9354
10341
|
default: break;
|
9355
10342
|
}
|
@@ -9466,7 +10453,7 @@ parse_parameters(
|
|
9466
10453
|
if (params->rest == NULL) {
|
9467
10454
|
pm_parameters_node_rest_set(params, param);
|
9468
10455
|
} else {
|
9469
|
-
|
10456
|
+
pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
|
9470
10457
|
pm_parameters_node_posts_append(params, (pm_node_t *) param);
|
9471
10458
|
}
|
9472
10459
|
|
@@ -9500,7 +10487,7 @@ parse_parameters(
|
|
9500
10487
|
if (params->keyword_rest == NULL) {
|
9501
10488
|
pm_parameters_node_keyword_rest_set(params, param);
|
9502
10489
|
} else {
|
9503
|
-
|
10490
|
+
pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
|
9504
10491
|
pm_parameters_node_posts_append(params, param);
|
9505
10492
|
}
|
9506
10493
|
|
@@ -9518,11 +10505,11 @@ parse_parameters(
|
|
9518
10505
|
if (params->rest == NULL) {
|
9519
10506
|
pm_parameters_node_rest_set(params, param);
|
9520
10507
|
} else {
|
9521
|
-
|
10508
|
+
pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
|
9522
10509
|
pm_parameters_node_posts_append(params, (pm_node_t *) param);
|
9523
10510
|
}
|
9524
10511
|
} else {
|
9525
|
-
|
10512
|
+
pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
|
9526
10513
|
}
|
9527
10514
|
}
|
9528
10515
|
|
@@ -9725,9 +10712,10 @@ parse_block_parameters(
|
|
9725
10712
|
}
|
9726
10713
|
|
9727
10714
|
pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
|
9728
|
-
if (accept1(parser, PM_TOKEN_SEMICOLON)) {
|
10715
|
+
if ((opening->type != PM_TOKEN_NOT_PROVIDED) && accept1(parser, PM_TOKEN_SEMICOLON)) {
|
9729
10716
|
do {
|
9730
10717
|
expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
|
10718
|
+
pm_parser_parameter_name_check(parser, &parser->previous);
|
9731
10719
|
pm_parser_local_add_token(parser, &parser->previous);
|
9732
10720
|
|
9733
10721
|
pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
|
@@ -9850,7 +10838,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
9850
10838
|
if (arguments->block == NULL) {
|
9851
10839
|
arguments->block = (pm_node_t *) block;
|
9852
10840
|
} else {
|
9853
|
-
|
10841
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
|
9854
10842
|
if (arguments->arguments == NULL) {
|
9855
10843
|
arguments->arguments = pm_arguments_node_create(parser);
|
9856
10844
|
}
|
@@ -9873,7 +10861,7 @@ parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_contex
|
|
9873
10861
|
bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
|
9874
10862
|
predicate_closed |= accept1(parser, PM_TOKEN_KEYWORD_THEN);
|
9875
10863
|
if (!predicate_closed) {
|
9876
|
-
|
10864
|
+
pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
|
9877
10865
|
}
|
9878
10866
|
|
9879
10867
|
context_pop(parser);
|
@@ -10057,25 +11045,12 @@ parse_string_part(pm_parser_t *parser) {
|
|
10057
11045
|
// "aaa #{bbb} #@ccc ddd"
|
10058
11046
|
// ^^^^ ^ ^^^^
|
10059
11047
|
case PM_TOKEN_STRING_CONTENT: {
|
10060
|
-
pm_unescape_type_t unescape_type = PM_UNESCAPE_ALL;
|
10061
|
-
|
10062
|
-
if (parser->lex_modes.current->mode == PM_LEX_HEREDOC) {
|
10063
|
-
if (parser->lex_modes.current->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
|
10064
|
-
// If we're in a tilde heredoc, we want to unescape it later
|
10065
|
-
// because we don't want unescaped newlines to disappear
|
10066
|
-
// before we handle them in the dedent.
|
10067
|
-
unescape_type = PM_UNESCAPE_NONE;
|
10068
|
-
} else if (parser->lex_modes.current->as.heredoc.quote == PM_HEREDOC_QUOTE_SINGLE) {
|
10069
|
-
unescape_type = PM_UNESCAPE_MINIMAL;
|
10070
|
-
}
|
10071
|
-
}
|
10072
|
-
|
10073
|
-
parser_lex(parser);
|
10074
|
-
|
10075
11048
|
pm_token_t opening = not_provided(parser);
|
10076
11049
|
pm_token_t closing = not_provided(parser);
|
11050
|
+
pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
|
10077
11051
|
|
10078
|
-
|
11052
|
+
parser_lex(parser);
|
11053
|
+
return node;
|
10079
11054
|
}
|
10080
11055
|
// Here the lexer has returned the beginning of an embedded expression. In
|
10081
11056
|
// that case we'll parse the inner statements and return that as the part.
|
@@ -10166,7 +11141,7 @@ parse_string_part(pm_parser_t *parser) {
|
|
10166
11141
|
}
|
10167
11142
|
default:
|
10168
11143
|
parser_lex(parser);
|
10169
|
-
|
11144
|
+
pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
|
10170
11145
|
return NULL;
|
10171
11146
|
}
|
10172
11147
|
}
|
@@ -10177,7 +11152,6 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
10177
11152
|
|
10178
11153
|
if (lex_mode->mode != PM_LEX_STRING) {
|
10179
11154
|
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
|
10180
|
-
pm_token_t symbol;
|
10181
11155
|
|
10182
11156
|
switch (parser->current.type) {
|
10183
11157
|
case PM_TOKEN_IDENTIFIER:
|
@@ -10190,21 +11164,21 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
10190
11164
|
case PM_TOKEN_BACK_REFERENCE:
|
10191
11165
|
case PM_CASE_KEYWORD:
|
10192
11166
|
parser_lex(parser);
|
10193
|
-
symbol = parser->previous;
|
10194
11167
|
break;
|
10195
11168
|
case PM_CASE_OPERATOR:
|
10196
11169
|
lex_state_set(parser, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
|
10197
11170
|
parser_lex(parser);
|
10198
|
-
symbol = parser->previous;
|
10199
11171
|
break;
|
10200
11172
|
default:
|
10201
11173
|
expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
|
10202
|
-
symbol = parser->previous;
|
10203
11174
|
break;
|
10204
11175
|
}
|
10205
11176
|
|
10206
11177
|
pm_token_t closing = not_provided(parser);
|
10207
|
-
|
11178
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
11179
|
+
|
11180
|
+
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
11181
|
+
return (pm_node_t *) symbol;
|
10208
11182
|
}
|
10209
11183
|
|
10210
11184
|
if (lex_mode->as.string.interpolation) {
|
@@ -10215,7 +11189,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
10215
11189
|
|
10216
11190
|
pm_token_t content = not_provided(parser);
|
10217
11191
|
pm_token_t closing = parser->previous;
|
10218
|
-
return (pm_node_t *)
|
11192
|
+
return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
|
10219
11193
|
}
|
10220
11194
|
|
10221
11195
|
// Now we can parse the first part of the symbol.
|
@@ -10248,18 +11222,23 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
10248
11222
|
}
|
10249
11223
|
|
10250
11224
|
pm_token_t content;
|
10251
|
-
|
10252
|
-
|
11225
|
+
pm_string_t unescaped;
|
11226
|
+
|
11227
|
+
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
11228
|
+
content = parser->current;
|
11229
|
+
unescaped = parser->current_string;
|
11230
|
+
parser_lex(parser);
|
10253
11231
|
} else {
|
10254
11232
|
content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
|
11233
|
+
pm_string_shared_init(&unescaped, content.start, content.end);
|
10255
11234
|
}
|
10256
11235
|
|
10257
11236
|
if (next_state != PM_LEX_STATE_NONE) {
|
10258
11237
|
lex_state_set(parser, next_state);
|
10259
11238
|
}
|
10260
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
|
10261
11239
|
|
10262
|
-
|
11240
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
|
11241
|
+
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
10263
11242
|
}
|
10264
11243
|
|
10265
11244
|
// Parse an argument to undef which can either be a bare word, a
|
@@ -10276,8 +11255,10 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
10276
11255
|
|
10277
11256
|
pm_token_t opening = not_provided(parser);
|
10278
11257
|
pm_token_t closing = not_provided(parser);
|
11258
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
10279
11259
|
|
10280
|
-
|
11260
|
+
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
11261
|
+
return (pm_node_t *) symbol;
|
10281
11262
|
}
|
10282
11263
|
case PM_TOKEN_SYMBOL_BEGIN: {
|
10283
11264
|
pm_lex_mode_t lex_mode = *parser->lex_modes.current;
|
@@ -10286,7 +11267,7 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
10286
11267
|
return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE);
|
10287
11268
|
}
|
10288
11269
|
default:
|
10289
|
-
|
11270
|
+
pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
|
10290
11271
|
return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
|
10291
11272
|
}
|
10292
11273
|
}
|
@@ -10310,8 +11291,10 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
|
|
10310
11291
|
parser_lex(parser);
|
10311
11292
|
pm_token_t opening = not_provided(parser);
|
10312
11293
|
pm_token_t closing = not_provided(parser);
|
11294
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
10313
11295
|
|
10314
|
-
|
11296
|
+
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
11297
|
+
return (pm_node_t *) symbol;
|
10315
11298
|
}
|
10316
11299
|
case PM_TOKEN_SYMBOL_BEGIN: {
|
10317
11300
|
pm_lex_mode_t lex_mode = *parser->lex_modes.current;
|
@@ -10329,7 +11312,7 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
|
|
10329
11312
|
parser_lex(parser);
|
10330
11313
|
return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
|
10331
11314
|
default:
|
10332
|
-
|
11315
|
+
pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
|
10333
11316
|
return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
|
10334
11317
|
}
|
10335
11318
|
}
|
@@ -10366,9 +11349,9 @@ parse_variable_call(pm_parser_t *parser) {
|
|
10366
11349
|
// local variable read. If it's not, then we'll create a normal call
|
10367
11350
|
// node but add an error.
|
10368
11351
|
if (parser->current_scope->explicit_params) {
|
10369
|
-
|
11352
|
+
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
|
10370
11353
|
} else if (outer_scope_using_numbered_params_p(parser)) {
|
10371
|
-
|
11354
|
+
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
10372
11355
|
} else {
|
10373
11356
|
// When you use a numbered parameter, it implies the existence
|
10374
11357
|
// of all of the locals that exist before it. For example,
|
@@ -10421,76 +11404,8 @@ parse_method_definition_name(pm_parser_t *parser) {
|
|
10421
11404
|
}
|
10422
11405
|
}
|
10423
11406
|
|
10424
|
-
static
|
10425
|
-
|
10426
|
-
{
|
10427
|
-
const pm_location_t *content_loc = &((pm_string_node_t *) node)->content_loc;
|
10428
|
-
int cur_whitespace;
|
10429
|
-
const uint8_t *cur_char = content_loc->start;
|
10430
|
-
|
10431
|
-
while (cur_char && cur_char < content_loc->end) {
|
10432
|
-
// Any empty newlines aren't included in the minimum whitespace
|
10433
|
-
// calculation.
|
10434
|
-
size_t eol_length;
|
10435
|
-
while ((eol_length = match_eol_at(parser, cur_char))) {
|
10436
|
-
cur_char += eol_length;
|
10437
|
-
}
|
10438
|
-
|
10439
|
-
if (cur_char == content_loc->end) break;
|
10440
|
-
|
10441
|
-
cur_whitespace = 0;
|
10442
|
-
|
10443
|
-
while (pm_char_is_inline_whitespace(*cur_char) && cur_char < content_loc->end) {
|
10444
|
-
if (cur_char[0] == '\t') {
|
10445
|
-
cur_whitespace = (cur_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
|
10446
|
-
} else {
|
10447
|
-
cur_whitespace++;
|
10448
|
-
}
|
10449
|
-
cur_char++;
|
10450
|
-
}
|
10451
|
-
|
10452
|
-
// If we hit a newline, then we have encountered a line that
|
10453
|
-
// contains only whitespace, and it shouldn't be considered in
|
10454
|
-
// the calculation of common leading whitespace.
|
10455
|
-
eol_length = match_eol_at(parser, cur_char);
|
10456
|
-
if (eol_length) {
|
10457
|
-
cur_char += eol_length;
|
10458
|
-
continue;
|
10459
|
-
}
|
10460
|
-
|
10461
|
-
if (cur_whitespace < common_whitespace || common_whitespace == -1) {
|
10462
|
-
common_whitespace = cur_whitespace;
|
10463
|
-
}
|
10464
|
-
|
10465
|
-
cur_char = next_newline(cur_char + 1, parser->end - (cur_char + 1));
|
10466
|
-
if (cur_char) cur_char++;
|
10467
|
-
}
|
10468
|
-
return common_whitespace;
|
10469
|
-
}
|
10470
|
-
|
10471
|
-
// Calculate the common leading whitespace for each line in a heredoc.
|
10472
|
-
static int
|
10473
|
-
parse_heredoc_common_whitespace(pm_parser_t *parser, pm_node_list_t *nodes) {
|
10474
|
-
int common_whitespace = -1;
|
10475
|
-
|
10476
|
-
for (size_t index = 0; index < nodes->size; index++) {
|
10477
|
-
pm_node_t *node = nodes->nodes[index];
|
10478
|
-
if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) continue;
|
10479
|
-
|
10480
|
-
// If the previous node wasn't a string node, we don't want to trim
|
10481
|
-
// whitespace. This could happen after an interpolated expression or
|
10482
|
-
// variable.
|
10483
|
-
if (index == 0 || PM_NODE_TYPE_P(nodes->nodes[index - 1], PM_STRING_NODE)) {
|
10484
|
-
common_whitespace = parse_heredoc_common_whitespace_for_single_node(parser, node, common_whitespace);
|
10485
|
-
}
|
10486
|
-
}
|
10487
|
-
|
10488
|
-
return common_whitespace;
|
10489
|
-
}
|
10490
|
-
|
10491
|
-
static pm_string_t *
|
10492
|
-
parse_heredoc_dedent_single_node(pm_parser_t *parser, pm_string_t *string, bool dedent_node, int common_whitespace, pm_heredoc_quote_t quote)
|
10493
|
-
{
|
11407
|
+
static void
|
11408
|
+
parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
|
10494
11409
|
// Get a reference to the string struct that is being held by the string
|
10495
11410
|
// node. This is the value we're going to actually manipulate.
|
10496
11411
|
pm_string_ensure_owned(string);
|
@@ -10499,85 +11414,37 @@ parse_heredoc_dedent_single_node(pm_parser_t *parser, pm_string_t *string, bool
|
|
10499
11414
|
// destination to move bytes into. We'll also use it for bounds checking
|
10500
11415
|
// since we don't require that these strings be null terminated.
|
10501
11416
|
size_t dest_length = pm_string_length(string);
|
10502
|
-
uint8_t *
|
10503
|
-
|
10504
|
-
const uint8_t *source_cursor = source_start;
|
11417
|
+
const uint8_t *source_cursor = (uint8_t *) string->source;
|
10505
11418
|
const uint8_t *source_end = source_cursor + dest_length;
|
10506
11419
|
|
10507
11420
|
// We're going to move bytes backward in the string when we get leading
|
10508
11421
|
// whitespace, so we'll maintain a pointer to the current position in the
|
10509
11422
|
// string that we're writing to.
|
10510
|
-
|
10511
|
-
|
10512
|
-
|
10513
|
-
|
10514
|
-
|
10515
|
-
|
10516
|
-
|
10517
|
-
|
10518
|
-
|
10519
|
-
|
10520
|
-
|
10521
|
-
|
10522
|
-
while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
|
10523
|
-
if (*source_cursor == '\t') {
|
10524
|
-
trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
|
10525
|
-
if (trimmed_whitespace > common_whitespace) break;
|
10526
|
-
} else {
|
10527
|
-
trimmed_whitespace++;
|
10528
|
-
}
|
10529
|
-
|
10530
|
-
source_cursor++;
|
10531
|
-
dest_length--;
|
10532
|
-
}
|
10533
|
-
}
|
10534
|
-
|
10535
|
-
// At this point we have dedented all that we need to, so we need to find
|
10536
|
-
// the next newline.
|
10537
|
-
const uint8_t *breakpoint = next_newline(source_cursor, source_end - source_cursor);
|
10538
|
-
|
10539
|
-
if (breakpoint == NULL) {
|
10540
|
-
// If there isn't another newline, then we can just move the rest of the
|
10541
|
-
// string and break from the loop.
|
10542
|
-
memmove(dest_cursor, source_cursor, (size_t) (source_end - source_cursor));
|
10543
|
-
break;
|
11423
|
+
size_t trimmed_whitespace = 0;
|
11424
|
+
|
11425
|
+
// While we haven't reached the amount of common whitespace that we need to
|
11426
|
+
// trim and we haven't reached the end of the string, we'll keep trimming
|
11427
|
+
// whitespace. Trimming in this context means skipping over these bytes such
|
11428
|
+
// that they aren't copied into the new string.
|
11429
|
+
while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
|
11430
|
+
if (*source_cursor == '\t') {
|
11431
|
+
trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
|
11432
|
+
if (trimmed_whitespace > common_whitespace) break;
|
11433
|
+
} else {
|
11434
|
+
trimmed_whitespace++;
|
10544
11435
|
}
|
10545
11436
|
|
10546
|
-
|
10547
|
-
|
10548
|
-
if (breakpoint < source_end) breakpoint++;
|
10549
|
-
memmove(dest_cursor, source_cursor, (size_t) (breakpoint - source_cursor));
|
10550
|
-
dest_cursor += (breakpoint - source_cursor);
|
10551
|
-
source_cursor = breakpoint;
|
10552
|
-
dedent_node = true;
|
11437
|
+
source_cursor++;
|
11438
|
+
dest_length--;
|
10553
11439
|
}
|
10554
11440
|
|
10555
|
-
|
11441
|
+
memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
|
10556
11442
|
string->length = dest_length;
|
10557
|
-
|
10558
|
-
if (dest_length != 0) {
|
10559
|
-
pm_unescape_manipulate_string(parser, string, (quote == PM_HEREDOC_QUOTE_SINGLE) ? PM_UNESCAPE_MINIMAL : PM_UNESCAPE_ALL);
|
10560
|
-
}
|
10561
|
-
return string;
|
10562
11443
|
}
|
10563
11444
|
|
10564
11445
|
// Take a heredoc node that is indented by a ~ and trim the leading whitespace.
|
10565
11446
|
static void
|
10566
|
-
parse_heredoc_dedent(pm_parser_t *parser,
|
10567
|
-
{
|
10568
|
-
pm_node_list_t *nodes;
|
10569
|
-
|
10570
|
-
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
10571
|
-
nodes = &((pm_interpolated_x_string_node_t *) heredoc_node)->parts;
|
10572
|
-
} else {
|
10573
|
-
nodes = &((pm_interpolated_string_node_t *) heredoc_node)->parts;
|
10574
|
-
}
|
10575
|
-
|
10576
|
-
// First, calculate how much common whitespace we need to trim. If there is
|
10577
|
-
// none or it's 0, then we can return early.
|
10578
|
-
int common_whitespace;
|
10579
|
-
if ((common_whitespace = parse_heredoc_common_whitespace(parser, nodes)) <= 0) return;
|
10580
|
-
|
11447
|
+
parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
|
10581
11448
|
// The next node should be dedented if it's the first node in the list or if
|
10582
11449
|
// if follows a string node.
|
10583
11450
|
bool dedent_next = true;
|
@@ -10600,7 +11467,10 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_t *heredoc_node, pm_heredoc_qu
|
|
10600
11467
|
}
|
10601
11468
|
|
10602
11469
|
pm_string_node_t *string_node = ((pm_string_node_t *) node);
|
10603
|
-
|
11470
|
+
if (dedent_next) {
|
11471
|
+
parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
|
11472
|
+
}
|
11473
|
+
|
10604
11474
|
if (string_node->unescaped.length == 0) {
|
10605
11475
|
pm_node_destroy(parser, node);
|
10606
11476
|
} else {
|
@@ -10929,13 +11799,13 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
10929
11799
|
case PM_TOKEN_STRING_BEGIN:
|
10930
11800
|
key = parse_expression(parser, PM_BINDING_POWER_MAX, PM_ERR_PATTERN_HASH_KEY);
|
10931
11801
|
if (!pm_symbol_node_label_p(key)) {
|
10932
|
-
|
11802
|
+
pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
|
10933
11803
|
}
|
10934
11804
|
|
10935
11805
|
break;
|
10936
11806
|
default:
|
10937
11807
|
parser_lex(parser);
|
10938
|
-
|
11808
|
+
pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
|
10939
11809
|
key = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
10940
11810
|
break;
|
10941
11811
|
}
|
@@ -10970,7 +11840,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
10970
11840
|
return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
|
10971
11841
|
}
|
10972
11842
|
default: {
|
10973
|
-
|
11843
|
+
pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
|
10974
11844
|
pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
|
10975
11845
|
return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
|
10976
11846
|
}
|
@@ -11058,7 +11928,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
11058
11928
|
default: {
|
11059
11929
|
// If we get here, then we have a pin operator followed by something
|
11060
11930
|
// not understood. We'll create a missing node and return that.
|
11061
|
-
|
11931
|
+
pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
|
11062
11932
|
pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
|
11063
11933
|
return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
|
11064
11934
|
}
|
@@ -11082,7 +11952,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
11082
11952
|
return parse_pattern_constant_path(parser, node);
|
11083
11953
|
}
|
11084
11954
|
default:
|
11085
|
-
|
11955
|
+
pm_parser_err_current(parser, diag_id);
|
11086
11956
|
return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
|
11087
11957
|
}
|
11088
11958
|
}
|
@@ -11126,7 +11996,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
11126
11996
|
break;
|
11127
11997
|
}
|
11128
11998
|
default: {
|
11129
|
-
|
11999
|
+
pm_parser_err_current(parser, diag_id);
|
11130
12000
|
pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
|
11131
12001
|
|
11132
12002
|
if (node == NULL) {
|
@@ -11218,7 +12088,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
11218
12088
|
// will continue to parse the rest of the patterns, but we will indicate
|
11219
12089
|
// it as an error.
|
11220
12090
|
if (trailing_rest) {
|
11221
|
-
|
12091
|
+
pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
|
11222
12092
|
}
|
11223
12093
|
|
11224
12094
|
trailing_rest = true;
|
@@ -11284,6 +12154,7 @@ static inline pm_node_t *
|
|
11284
12154
|
parse_strings(pm_parser_t *parser) {
|
11285
12155
|
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
|
11286
12156
|
pm_node_t *result = NULL;
|
12157
|
+
bool state_is_arg_labeled = lex_state_p(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
|
11287
12158
|
|
11288
12159
|
while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
11289
12160
|
pm_node_t *node = NULL;
|
@@ -11301,17 +12172,30 @@ parse_strings(pm_parser_t *parser) {
|
|
11301
12172
|
// start. In that case we'll create an empty content token and
|
11302
12173
|
// return an uninterpolated string.
|
11303
12174
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
11304
|
-
|
12175
|
+
pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
|
12176
|
+
|
12177
|
+
pm_string_shared_init(&string->unescaped, content.start, content.end);
|
12178
|
+
node = (pm_node_t *) string;
|
11305
12179
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
11306
12180
|
// If we get here, then we have an end of a label immediately
|
11307
12181
|
// after a start. In that case we'll create an empty symbol
|
11308
12182
|
// node.
|
11309
12183
|
pm_token_t opening = not_provided(parser);
|
11310
12184
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
11311
|
-
|
12185
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
|
12186
|
+
|
12187
|
+
pm_string_shared_init(&symbol->unescaped, content.start, content.end);
|
12188
|
+
node = (pm_node_t *) symbol;
|
11312
12189
|
} else if (!lex_interpolation) {
|
11313
12190
|
// If we don't accept interpolation then we expect the string to
|
11314
12191
|
// start with a single string content node.
|
12192
|
+
pm_string_t unescaped;
|
12193
|
+
if (match1(parser, PM_TOKEN_EOF)) {
|
12194
|
+
unescaped = PM_EMPTY_STRING;
|
12195
|
+
} else {
|
12196
|
+
unescaped = parser->current_string;
|
12197
|
+
}
|
12198
|
+
|
11315
12199
|
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
|
11316
12200
|
pm_token_t content = parser->previous;
|
11317
12201
|
|
@@ -11330,21 +12214,22 @@ parse_strings(pm_parser_t *parser) {
|
|
11330
12214
|
pm_node_list_t parts = PM_EMPTY_NODE_LIST;
|
11331
12215
|
|
11332
12216
|
pm_token_t delimiters = not_provided(parser);
|
11333
|
-
pm_node_t *part = (pm_node_t *)
|
12217
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
|
11334
12218
|
pm_node_list_append(&parts, part);
|
11335
12219
|
|
11336
|
-
|
11337
|
-
part = (pm_node_t *)
|
12220
|
+
do {
|
12221
|
+
part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
|
11338
12222
|
pm_node_list_append(&parts, part);
|
11339
|
-
|
12223
|
+
parser_lex(parser);
|
12224
|
+
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
11340
12225
|
|
11341
12226
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
|
11342
12227
|
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
11343
|
-
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
11344
|
-
node = (pm_node_t *)
|
12228
|
+
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
12229
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
11345
12230
|
} else {
|
11346
12231
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
|
11347
|
-
node = (pm_node_t *)
|
12232
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
11348
12233
|
}
|
11349
12234
|
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
11350
12235
|
// In this case we've hit string content so we know the string
|
@@ -11352,12 +12237,14 @@ parse_strings(pm_parser_t *parser) {
|
|
11352
12237
|
// following token is the end (in which case we can return a
|
11353
12238
|
// plain string) or if it's not then it has interpolation.
|
11354
12239
|
pm_token_t content = parser->current;
|
12240
|
+
pm_string_t unescaped = parser->current_string;
|
11355
12241
|
parser_lex(parser);
|
11356
12242
|
|
11357
|
-
if (
|
11358
|
-
node = (pm_node_t *)
|
12243
|
+
if (match1(parser, PM_TOKEN_STRING_END)) {
|
12244
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
12245
|
+
parser_lex(parser);
|
11359
12246
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
11360
|
-
node = (pm_node_t *)
|
12247
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
11361
12248
|
} else {
|
11362
12249
|
// If we get here, then we have interpolation so we'll need
|
11363
12250
|
// to create a string or symbol node with interpolation.
|
@@ -11365,7 +12252,7 @@ parse_strings(pm_parser_t *parser) {
|
|
11365
12252
|
pm_token_t string_opening = not_provided(parser);
|
11366
12253
|
pm_token_t string_closing = not_provided(parser);
|
11367
12254
|
|
11368
|
-
pm_node_t *part = (pm_node_t *)
|
12255
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
|
11369
12256
|
pm_node_list_append(&parts, part);
|
11370
12257
|
|
11371
12258
|
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
@@ -11374,7 +12261,7 @@ parse_strings(pm_parser_t *parser) {
|
|
11374
12261
|
}
|
11375
12262
|
}
|
11376
12263
|
|
11377
|
-
if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
12264
|
+
if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
11378
12265
|
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
11379
12266
|
} else {
|
11380
12267
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
@@ -11382,11 +12269,11 @@ parse_strings(pm_parser_t *parser) {
|
|
11382
12269
|
}
|
11383
12270
|
}
|
11384
12271
|
} else {
|
11385
|
-
// If we get here, then the first part of the string is not
|
11386
|
-
//
|
11387
|
-
//
|
12272
|
+
// If we get here, then the first part of the string is not plain
|
12273
|
+
// string content, in which case we need to parse the string as an
|
12274
|
+
// interpolated string.
|
11388
12275
|
pm_node_list_t parts = PM_EMPTY_NODE_LIST;
|
11389
|
-
pm_node_t *part
|
12276
|
+
pm_node_t *part;
|
11390
12277
|
|
11391
12278
|
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
11392
12279
|
if ((part = parse_string_part(parser)) != NULL) {
|
@@ -11418,7 +12305,7 @@ parse_strings(pm_parser_t *parser) {
|
|
11418
12305
|
// If it cannot be concatenated with the previous node, then we'll
|
11419
12306
|
// need to add a syntax error.
|
11420
12307
|
if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
|
11421
|
-
|
12308
|
+
pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
|
11422
12309
|
}
|
11423
12310
|
|
11424
12311
|
// Either way we will create a concat node to hold the strings
|
@@ -11464,7 +12351,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11464
12351
|
element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
|
11465
12352
|
} else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
|
11466
12353
|
if (parsed_bare_hash) {
|
11467
|
-
|
12354
|
+
pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
|
11468
12355
|
}
|
11469
12356
|
|
11470
12357
|
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
|
@@ -11480,7 +12367,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11480
12367
|
|
11481
12368
|
if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
|
11482
12369
|
if (parsed_bare_hash) {
|
11483
|
-
|
12370
|
+
pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
|
11484
12371
|
}
|
11485
12372
|
|
11486
12373
|
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
|
@@ -11598,7 +12485,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11598
12485
|
// If we didn't find a terminator and we didn't find a right
|
11599
12486
|
// parenthesis, then this is a syntax error.
|
11600
12487
|
if (!terminator_found) {
|
11601
|
-
|
12488
|
+
pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
|
11602
12489
|
}
|
11603
12490
|
|
11604
12491
|
// Parse each statement within the parentheses.
|
@@ -11627,7 +12514,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11627
12514
|
} else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
11628
12515
|
break;
|
11629
12516
|
} else {
|
11630
|
-
|
12517
|
+
pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
|
11631
12518
|
}
|
11632
12519
|
}
|
11633
12520
|
|
@@ -11665,7 +12552,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11665
12552
|
content.start = content.start + 1;
|
11666
12553
|
|
11667
12554
|
pm_token_t closing = not_provided(parser);
|
11668
|
-
pm_node_t *node = (pm_node_t *)
|
12555
|
+
pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
|
11669
12556
|
|
11670
12557
|
// Characters can be followed by strings in which case they are
|
11671
12558
|
// automatically concatenated.
|
@@ -11839,9 +12726,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11839
12726
|
case PM_TOKEN_HEREDOC_START: {
|
11840
12727
|
// Here we have found a heredoc. We'll parse it and add it to the
|
11841
12728
|
// list of strings.
|
11842
|
-
|
11843
|
-
|
11844
|
-
|
12729
|
+
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
12730
|
+
assert(lex_mode->mode == PM_LEX_HEREDOC);
|
12731
|
+
pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
|
12732
|
+
pm_heredoc_indent_t indent = lex_mode->as.heredoc.indent;
|
11845
12733
|
|
11846
12734
|
parser_lex(parser);
|
11847
12735
|
pm_token_t opening = parser->previous;
|
@@ -11857,9 +12745,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11857
12745
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
11858
12746
|
|
11859
12747
|
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
11860
|
-
node = (pm_node_t *)
|
12748
|
+
node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_EMPTY_STRING);
|
11861
12749
|
} else {
|
11862
|
-
node = (pm_node_t *)
|
12750
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_EMPTY_STRING);
|
11863
12751
|
}
|
11864
12752
|
|
11865
12753
|
node->location.end = opening.end;
|
@@ -11884,15 +12772,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11884
12772
|
cast->base.type = PM_X_STRING_NODE;
|
11885
12773
|
}
|
11886
12774
|
|
11887
|
-
|
11888
|
-
|
12775
|
+
size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
|
12776
|
+
if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
|
12777
|
+
parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
|
12778
|
+
}
|
11889
12779
|
|
11890
12780
|
node = (pm_node_t *) cast;
|
11891
|
-
|
11892
|
-
|
11893
|
-
int common_whitespace = parse_heredoc_common_whitespace_for_single_node(parser, node, -1);
|
11894
|
-
parse_heredoc_dedent_single_node(parser, &cast->unescaped, true, common_whitespace, quote);
|
11895
|
-
}
|
12781
|
+
lex_state_set(parser, PM_LEX_STATE_END);
|
12782
|
+
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
11896
12783
|
} else {
|
11897
12784
|
// If we get here, then we have multiple parts in the heredoc,
|
11898
12785
|
// so we'll need to create an interpolated string node to hold
|
@@ -11931,8 +12818,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11931
12818
|
|
11932
12819
|
// If this is a heredoc that is indented with a ~, then we need
|
11933
12820
|
// to dedent each line by the common leading whitespace.
|
11934
|
-
|
11935
|
-
|
12821
|
+
size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
|
12822
|
+
if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
|
12823
|
+
pm_node_list_t *nodes;
|
12824
|
+
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
12825
|
+
nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
|
12826
|
+
} else {
|
12827
|
+
nodes = &((pm_interpolated_string_node_t *) node)->parts;
|
12828
|
+
}
|
12829
|
+
|
12830
|
+
parse_heredoc_dedent(parser, nodes, common_whitespace);
|
11936
12831
|
}
|
11937
12832
|
}
|
11938
12833
|
|
@@ -11995,10 +12890,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11995
12890
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
11996
12891
|
if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
|
11997
12892
|
if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
|
11998
|
-
|
12893
|
+
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
|
11999
12894
|
}
|
12000
12895
|
} else {
|
12001
|
-
|
12896
|
+
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
|
12002
12897
|
}
|
12003
12898
|
|
12004
12899
|
return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
|
@@ -12006,7 +12901,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12006
12901
|
case PM_SYMBOL_NODE:
|
12007
12902
|
case PM_INTERPOLATED_SYMBOL_NODE: {
|
12008
12903
|
if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
|
12009
|
-
|
12904
|
+
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
|
12010
12905
|
}
|
12011
12906
|
}
|
12012
12907
|
/* fallthrough */
|
@@ -12032,7 +12927,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12032
12927
|
}
|
12033
12928
|
|
12034
12929
|
if (accept1(parser, PM_TOKEN_KEYWORD_END)) {
|
12035
|
-
|
12930
|
+
pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
|
12036
12931
|
return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, NULL, &parser->previous);
|
12037
12932
|
}
|
12038
12933
|
|
@@ -12142,7 +13037,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12142
13037
|
// If we didn't parse any conditions (in or when) then we need to
|
12143
13038
|
// indicate that we have an error.
|
12144
13039
|
if (case_node->conditions.size == 0) {
|
12145
|
-
|
13040
|
+
pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
|
12146
13041
|
}
|
12147
13042
|
|
12148
13043
|
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
|
@@ -12185,12 +13080,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12185
13080
|
pm_begin_node_end_keyword_set(begin_node, &parser->previous);
|
12186
13081
|
|
12187
13082
|
if ((begin_node->else_clause != NULL) && (begin_node->rescue_clause == NULL)) {
|
12188
|
-
|
12189
|
-
&parser->error_list,
|
12190
|
-
begin_node->else_clause->base.location.start,
|
12191
|
-
begin_node->else_clause->base.location.end,
|
12192
|
-
PM_ERR_BEGIN_LONELY_ELSE
|
12193
|
-
);
|
13083
|
+
pm_parser_err_node(parser, (pm_node_t *) begin_node->else_clause, PM_ERR_BEGIN_LONELY_ELSE);
|
12194
13084
|
}
|
12195
13085
|
|
12196
13086
|
return (pm_node_t *) begin_node;
|
@@ -12206,7 +13096,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12206
13096
|
expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
|
12207
13097
|
pm_context_t context = parser->current_context->context;
|
12208
13098
|
if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
|
12209
|
-
|
13099
|
+
pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
|
12210
13100
|
}
|
12211
13101
|
return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
|
12212
13102
|
}
|
@@ -12239,7 +13129,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12239
13129
|
(parser->current_context->context == PM_CONTEXT_CLASS) ||
|
12240
13130
|
(parser->current_context->context == PM_CONTEXT_MODULE)
|
12241
13131
|
) {
|
12242
|
-
|
13132
|
+
pm_parser_err_current(parser, PM_ERR_RETURN_INVALID);
|
12243
13133
|
}
|
12244
13134
|
return (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
|
12245
13135
|
}
|
@@ -12305,7 +13195,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12305
13195
|
pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_CLASS_NAME);
|
12306
13196
|
pm_token_t name = parser->previous;
|
12307
13197
|
if (name.type != PM_TOKEN_CONSTANT) {
|
12308
|
-
|
13198
|
+
pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
|
12309
13199
|
}
|
12310
13200
|
|
12311
13201
|
pm_token_t inheritance_operator;
|
@@ -12346,7 +13236,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12346
13236
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
|
12347
13237
|
|
12348
13238
|
if (context_def_p(parser)) {
|
12349
|
-
|
13239
|
+
pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
|
12350
13240
|
}
|
12351
13241
|
|
12352
13242
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
@@ -12354,7 +13244,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12354
13244
|
pm_do_loop_stack_pop(parser);
|
12355
13245
|
|
12356
13246
|
if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
|
12357
|
-
|
13247
|
+
pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
|
12358
13248
|
}
|
12359
13249
|
|
12360
13250
|
return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
|
@@ -12486,7 +13376,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12486
13376
|
// If, after all that, we were unable to find a method name, add an
|
12487
13377
|
// error to the error list.
|
12488
13378
|
if (name.type == PM_TOKEN_MISSING) {
|
12489
|
-
|
13379
|
+
pm_parser_err_previous(parser, PM_ERR_DEF_NAME);
|
12490
13380
|
}
|
12491
13381
|
|
12492
13382
|
pm_token_t lparen;
|
@@ -12538,7 +13428,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12538
13428
|
|
12539
13429
|
if (accept1(parser, PM_TOKEN_EQUAL)) {
|
12540
13430
|
if (token_is_setter_name(&name)) {
|
12541
|
-
|
13431
|
+
pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
|
12542
13432
|
}
|
12543
13433
|
equal = parser->previous;
|
12544
13434
|
|
@@ -12656,6 +13546,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12656
13546
|
parser_lex(parser);
|
12657
13547
|
pm_token_t for_keyword = parser->previous;
|
12658
13548
|
pm_node_t *index;
|
13549
|
+
pm_parser_scope_push_transparent(parser);
|
12659
13550
|
|
12660
13551
|
// First, parse out the first index expression.
|
12661
13552
|
if (accept1(parser, PM_TOKEN_USTAR)) {
|
@@ -12670,7 +13561,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12670
13561
|
} else if (token_begins_expression_p(parser->current.type)) {
|
12671
13562
|
index = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
|
12672
13563
|
} else {
|
12673
|
-
|
13564
|
+
pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
|
12674
13565
|
index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
|
12675
13566
|
}
|
12676
13567
|
|
@@ -12681,6 +13572,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12681
13572
|
index = parse_target(parser, index);
|
12682
13573
|
}
|
12683
13574
|
|
13575
|
+
pm_parser_scope_pop(parser);
|
12684
13576
|
pm_do_loop_stack_push(parser, true);
|
12685
13577
|
|
12686
13578
|
expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
|
@@ -12700,8 +13592,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12700
13592
|
pm_statements_node_t *statements = NULL;
|
12701
13593
|
|
12702
13594
|
if (!accept1(parser, PM_TOKEN_KEYWORD_END)) {
|
13595
|
+
pm_parser_scope_push_transparent(parser);
|
12703
13596
|
statements = parse_statements(parser, PM_CONTEXT_FOR);
|
12704
13597
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
|
13598
|
+
pm_parser_scope_pop(parser);
|
12705
13599
|
}
|
12706
13600
|
|
12707
13601
|
return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
|
@@ -12797,7 +13691,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12797
13691
|
// syntax error. We handle that here as well.
|
12798
13692
|
name = parser->previous;
|
12799
13693
|
if (name.type != PM_TOKEN_CONSTANT) {
|
12800
|
-
|
13694
|
+
pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
|
12801
13695
|
}
|
12802
13696
|
|
12803
13697
|
pm_parser_scope_push(parser, true);
|
@@ -12821,7 +13715,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12821
13715
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
|
12822
13716
|
|
12823
13717
|
if (context_def_p(parser)) {
|
12824
|
-
|
13718
|
+
pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
|
12825
13719
|
}
|
12826
13720
|
|
12827
13721
|
return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
|
@@ -12891,13 +13785,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12891
13785
|
accept1(parser, PM_TOKEN_WORDS_SEP);
|
12892
13786
|
if (match1(parser, PM_TOKEN_STRING_END)) break;
|
12893
13787
|
|
12894
|
-
|
12895
|
-
|
12896
|
-
|
12897
|
-
|
13788
|
+
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
13789
|
+
pm_token_t opening = not_provided(parser);
|
13790
|
+
pm_token_t closing = not_provided(parser);
|
13791
|
+
pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
|
13792
|
+
}
|
12898
13793
|
|
12899
|
-
|
12900
|
-
pm_array_node_elements_append(array, symbol);
|
13794
|
+
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
|
12901
13795
|
}
|
12902
13796
|
|
12903
13797
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
|
@@ -12937,26 +13831,26 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12937
13831
|
// If we hit content and the current node is NULL, then this is
|
12938
13832
|
// the first string content we've seen. In that case we're going
|
12939
13833
|
// to create a new string node and set that to the current.
|
13834
|
+
current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
|
12940
13835
|
parser_lex(parser);
|
12941
|
-
current = (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, PM_UNESCAPE_ALL);
|
12942
13836
|
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
|
12943
13837
|
// If we hit string content and the current node is an
|
12944
13838
|
// interpolated string, then we need to append the string content
|
12945
13839
|
// to the list of child nodes.
|
12946
|
-
pm_node_t *
|
12947
|
-
|
13840
|
+
pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
|
13841
|
+
parser_lex(parser);
|
13842
|
+
|
13843
|
+
pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
|
12948
13844
|
} else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
|
12949
13845
|
// If we hit string content and the current node is a string node,
|
12950
13846
|
// then we need to convert the current node into an interpolated
|
12951
13847
|
// string and add the string content to the list of child nodes.
|
12952
|
-
|
12953
|
-
|
12954
|
-
pm_interpolated_symbol_node_t *interpolated =
|
12955
|
-
pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
|
12956
|
-
pm_interpolated_symbol_node_append(interpolated, current);
|
13848
|
+
pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
|
13849
|
+
parser_lex(parser);
|
12957
13850
|
|
12958
|
-
|
12959
|
-
pm_interpolated_symbol_node_append(interpolated,
|
13851
|
+
pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
|
13852
|
+
pm_interpolated_symbol_node_append(interpolated, current);
|
13853
|
+
pm_interpolated_symbol_node_append(interpolated, string);
|
12960
13854
|
current = (pm_node_t *) interpolated;
|
12961
13855
|
} else {
|
12962
13856
|
assert(false && "unreachable");
|
@@ -13063,12 +13957,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13063
13957
|
accept1(parser, PM_TOKEN_WORDS_SEP);
|
13064
13958
|
if (match1(parser, PM_TOKEN_STRING_END)) break;
|
13065
13959
|
|
13066
|
-
|
13960
|
+
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
13961
|
+
pm_token_t opening = not_provided(parser);
|
13962
|
+
pm_token_t closing = not_provided(parser);
|
13067
13963
|
|
13068
|
-
|
13069
|
-
|
13070
|
-
|
13071
|
-
|
13964
|
+
pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
|
13965
|
+
pm_array_node_elements_append(array, string);
|
13966
|
+
}
|
13967
|
+
|
13968
|
+
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
|
13072
13969
|
}
|
13073
13970
|
|
13074
13971
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
|
@@ -13101,29 +13998,29 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13101
13998
|
break;
|
13102
13999
|
}
|
13103
14000
|
case PM_TOKEN_STRING_CONTENT: {
|
14001
|
+
pm_token_t opening = not_provided(parser);
|
14002
|
+
pm_token_t closing = not_provided(parser);
|
14003
|
+
|
14004
|
+
pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
|
14005
|
+
parser_lex(parser);
|
14006
|
+
|
13104
14007
|
if (current == NULL) {
|
13105
14008
|
// If we hit content and the current node is NULL, then this is
|
13106
14009
|
// the first string content we've seen. In that case we're going
|
13107
14010
|
// to create a new string node and set that to the current.
|
13108
|
-
current =
|
14011
|
+
current = string;
|
13109
14012
|
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
|
13110
14013
|
// If we hit string content and the current node is an
|
13111
14014
|
// interpolated string, then we need to append the string content
|
13112
14015
|
// to the list of child nodes.
|
13113
|
-
|
13114
|
-
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
|
14016
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
|
13115
14017
|
} else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
|
13116
14018
|
// If we hit string content and the current node is a string node,
|
13117
14019
|
// then we need to convert the current node into an interpolated
|
13118
14020
|
// string and add the string content to the list of child nodes.
|
13119
|
-
|
13120
|
-
pm_token_t closing = not_provided(parser);
|
13121
|
-
pm_interpolated_string_node_t *interpolated =
|
13122
|
-
pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
14021
|
+
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
13123
14022
|
pm_interpolated_string_node_append(interpolated, current);
|
13124
|
-
|
13125
|
-
pm_node_t *part = parse_string_part(parser);
|
13126
|
-
pm_interpolated_string_node_append(interpolated, part);
|
14023
|
+
pm_interpolated_string_node_append(interpolated, string);
|
13127
14024
|
current = (pm_node_t *) interpolated;
|
13128
14025
|
} else {
|
13129
14026
|
assert(false && "unreachable");
|
@@ -13218,7 +14115,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13218
14115
|
};
|
13219
14116
|
|
13220
14117
|
parser_lex(parser);
|
13221
|
-
return (pm_node_t *)
|
14118
|
+
return (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
|
13222
14119
|
}
|
13223
14120
|
|
13224
14121
|
pm_interpolated_regular_expression_node_t *node;
|
@@ -13228,6 +14125,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13228
14125
|
// expression at least has something in it. We'll need to check if the
|
13229
14126
|
// following token is the end (in which case we can return a plain
|
13230
14127
|
// regular expression) or if it's not then it has interpolation.
|
14128
|
+
pm_string_t unescaped = parser->current_string;
|
13231
14129
|
pm_token_t content = parser->current;
|
13232
14130
|
parser_lex(parser);
|
13233
14131
|
|
@@ -13235,7 +14133,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13235
14133
|
// without interpolation, which can be represented more succinctly and
|
13236
14134
|
// more easily compiled.
|
13237
14135
|
if (accept1(parser, PM_TOKEN_REGEXP_END)) {
|
13238
|
-
return (pm_node_t *)
|
14136
|
+
return (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
13239
14137
|
}
|
13240
14138
|
|
13241
14139
|
// If we get here, then we have interpolation so we'll need to create
|
@@ -13244,7 +14142,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13244
14142
|
|
13245
14143
|
pm_token_t opening = not_provided(parser);
|
13246
14144
|
pm_token_t closing = not_provided(parser);
|
13247
|
-
pm_node_t *part = (pm_node_t *)
|
14145
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
|
13248
14146
|
pm_interpolated_regular_expression_node_append(node, part);
|
13249
14147
|
} else {
|
13250
14148
|
// If the first part of the body of the regular expression is not a
|
@@ -13255,9 +14153,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13255
14153
|
|
13256
14154
|
// Now that we're here and we have interpolation, we'll parse all of the
|
13257
14155
|
// parts into the list.
|
14156
|
+
pm_node_t *part;
|
13258
14157
|
while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
|
13259
|
-
|
13260
|
-
if (part != NULL) {
|
14158
|
+
if ((part = parse_string_part(parser)) != NULL) {
|
13261
14159
|
pm_interpolated_regular_expression_node_append(node, part);
|
13262
14160
|
}
|
13263
14161
|
}
|
@@ -13293,35 +14191,37 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13293
14191
|
pm_interpolated_x_string_node_t *node;
|
13294
14192
|
|
13295
14193
|
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
13296
|
-
// In this case we've hit string content so we know the string
|
13297
|
-
// has something in it. We'll need to check if the
|
13298
|
-
// the end (in which case we can return a
|
13299
|
-
// then it has interpolation.
|
14194
|
+
// In this case we've hit string content so we know the string
|
14195
|
+
// at least has something in it. We'll need to check if the
|
14196
|
+
// following token is the end (in which case we can return a
|
14197
|
+
// plain string) or if it's not then it has interpolation.
|
14198
|
+
pm_string_t unescaped = parser->current_string;
|
13300
14199
|
pm_token_t content = parser->current;
|
13301
14200
|
parser_lex(parser);
|
13302
14201
|
|
13303
14202
|
if (accept1(parser, PM_TOKEN_STRING_END)) {
|
13304
|
-
return (pm_node_t *)
|
14203
|
+
return (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
13305
14204
|
}
|
13306
14205
|
|
13307
|
-
// If we get here, then we have interpolation so we'll need to
|
13308
|
-
// a string node with interpolation.
|
14206
|
+
// If we get here, then we have interpolation so we'll need to
|
14207
|
+
// create a string node with interpolation.
|
13309
14208
|
node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
|
13310
14209
|
|
13311
14210
|
pm_token_t opening = not_provided(parser);
|
13312
14211
|
pm_token_t closing = not_provided(parser);
|
13313
|
-
pm_node_t *part = (pm_node_t *)
|
14212
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
|
14213
|
+
|
13314
14214
|
pm_interpolated_xstring_node_append(node, part);
|
13315
14215
|
} else {
|
13316
|
-
// If the first part of the body of the string is not a string
|
13317
|
-
// then we have interpolation and we need to create an
|
13318
|
-
// string node.
|
14216
|
+
// If the first part of the body of the string is not a string
|
14217
|
+
// content, then we have interpolation and we need to create an
|
14218
|
+
// interpolated string node.
|
13319
14219
|
node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
|
13320
14220
|
}
|
13321
14221
|
|
14222
|
+
pm_node_t *part;
|
13322
14223
|
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
13323
|
-
|
13324
|
-
if (part != NULL) {
|
14224
|
+
if ((part = parse_string_part(parser)) != NULL) {
|
13325
14225
|
pm_interpolated_xstring_node_append(node, part);
|
13326
14226
|
}
|
13327
14227
|
}
|
@@ -13542,7 +14442,7 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_
|
|
13542
14442
|
static void
|
13543
14443
|
parse_call_operator_write_block(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
|
13544
14444
|
if (call_node->block != NULL) {
|
13545
|
-
|
14445
|
+
pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
|
13546
14446
|
pm_node_destroy(parser, (pm_node_t *) call_node->block);
|
13547
14447
|
call_node->block = NULL;
|
13548
14448
|
}
|
@@ -13590,7 +14490,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13590
14490
|
// In this case we have an = sign, but we don't know what it's for. We
|
13591
14491
|
// need to treat it as an error. For now, we'll mark it as an error
|
13592
14492
|
// and just skip right past it.
|
13593
|
-
|
14493
|
+
pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
|
13594
14494
|
return node;
|
13595
14495
|
}
|
13596
14496
|
}
|
@@ -13598,7 +14498,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13598
14498
|
switch (PM_NODE_TYPE(node)) {
|
13599
14499
|
case PM_BACK_REFERENCE_READ_NODE:
|
13600
14500
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
13601
|
-
|
14501
|
+
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
13602
14502
|
/* fallthrough */
|
13603
14503
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
13604
14504
|
parser_lex(parser);
|
@@ -13661,7 +14561,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13661
14561
|
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
|
13662
14562
|
|
13663
14563
|
if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
|
13664
|
-
|
14564
|
+
pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
13665
14565
|
}
|
13666
14566
|
|
13667
14567
|
parser_lex(parser);
|
@@ -13683,7 +14583,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13683
14583
|
}
|
13684
14584
|
case PM_MULTI_WRITE_NODE: {
|
13685
14585
|
parser_lex(parser);
|
13686
|
-
|
14586
|
+
pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
|
13687
14587
|
return node;
|
13688
14588
|
}
|
13689
14589
|
default:
|
@@ -13692,7 +14592,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13692
14592
|
// In this case we have an &&= sign, but we don't know what it's for.
|
13693
14593
|
// We need to treat it as an error. For now, we'll mark it as an error
|
13694
14594
|
// and just skip right past it.
|
13695
|
-
|
14595
|
+
pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
|
13696
14596
|
return node;
|
13697
14597
|
}
|
13698
14598
|
}
|
@@ -13700,7 +14600,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13700
14600
|
switch (PM_NODE_TYPE(node)) {
|
13701
14601
|
case PM_BACK_REFERENCE_READ_NODE:
|
13702
14602
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
13703
|
-
|
14603
|
+
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
13704
14604
|
/* fallthrough */
|
13705
14605
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
13706
14606
|
parser_lex(parser);
|
@@ -13763,7 +14663,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13763
14663
|
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
|
13764
14664
|
|
13765
14665
|
if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
|
13766
|
-
|
14666
|
+
pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
13767
14667
|
}
|
13768
14668
|
|
13769
14669
|
parser_lex(parser);
|
@@ -13785,7 +14685,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13785
14685
|
}
|
13786
14686
|
case PM_MULTI_WRITE_NODE: {
|
13787
14687
|
parser_lex(parser);
|
13788
|
-
|
14688
|
+
pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
|
13789
14689
|
return node;
|
13790
14690
|
}
|
13791
14691
|
default:
|
@@ -13794,7 +14694,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13794
14694
|
// In this case we have an ||= sign, but we don't know what it's for.
|
13795
14695
|
// We need to treat it as an error. For now, we'll mark it as an error
|
13796
14696
|
// and just skip right past it.
|
13797
|
-
|
14697
|
+
pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
|
13798
14698
|
return node;
|
13799
14699
|
}
|
13800
14700
|
}
|
@@ -13812,7 +14712,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13812
14712
|
switch (PM_NODE_TYPE(node)) {
|
13813
14713
|
case PM_BACK_REFERENCE_READ_NODE:
|
13814
14714
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
13815
|
-
|
14715
|
+
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
13816
14716
|
/* fallthrough */
|
13817
14717
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
13818
14718
|
parser_lex(parser);
|
@@ -13875,7 +14775,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13875
14775
|
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
|
13876
14776
|
|
13877
14777
|
if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
|
13878
|
-
|
14778
|
+
pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
13879
14779
|
}
|
13880
14780
|
|
13881
14781
|
parser_lex(parser);
|
@@ -13897,7 +14797,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13897
14797
|
}
|
13898
14798
|
case PM_MULTI_WRITE_NODE: {
|
13899
14799
|
parser_lex(parser);
|
13900
|
-
|
14800
|
+
pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
|
13901
14801
|
return node;
|
13902
14802
|
}
|
13903
14803
|
default:
|
@@ -13906,7 +14806,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13906
14806
|
// In this case we have an operator but we don't know what it's for.
|
13907
14807
|
// We need to treat it as an error. For now, we'll mark it as an error
|
13908
14808
|
// and just skip right past it.
|
13909
|
-
|
14809
|
+
pm_parser_err_previous(parser, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
|
13910
14810
|
return node;
|
13911
14811
|
}
|
13912
14812
|
}
|
@@ -14021,7 +14921,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
14021
14921
|
break;
|
14022
14922
|
}
|
14023
14923
|
default: {
|
14024
|
-
|
14924
|
+
pm_parser_err_current(parser, PM_ERR_DEF_NAME);
|
14025
14925
|
message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
|
14026
14926
|
}
|
14027
14927
|
}
|
@@ -14172,7 +15072,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
14172
15072
|
return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
|
14173
15073
|
}
|
14174
15074
|
default: {
|
14175
|
-
|
15075
|
+
pm_parser_err_token(parser, &delimiter, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
14176
15076
|
pm_node_t *child = (pm_node_t *) pm_missing_node_create(parser, delimiter.start, delimiter.end);
|
14177
15077
|
return (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
|
14178
15078
|
}
|
@@ -14220,7 +15120,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
14220
15120
|
|
14221
15121
|
if (block != NULL) {
|
14222
15122
|
if (arguments.block != NULL) {
|
14223
|
-
|
15123
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
|
14224
15124
|
if (arguments.arguments == NULL) {
|
14225
15125
|
arguments.arguments = pm_arguments_node_create(parser);
|
14226
15126
|
}
|
@@ -14283,7 +15183,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagn
|
|
14283
15183
|
// parse_expression_prefix is going to be a missing node. In that case we need
|
14284
15184
|
// to add the error message to the parser's error list.
|
14285
15185
|
if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
|
14286
|
-
|
15186
|
+
pm_parser_err(parser, recovery.end, recovery.end, diag_id);
|
14287
15187
|
return node;
|
14288
15188
|
}
|
14289
15189
|
|
@@ -14428,6 +15328,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch
|
|
14428
15328
|
.next_start = NULL,
|
14429
15329
|
.heredoc_end = NULL,
|
14430
15330
|
.comment_list = PM_LIST_EMPTY,
|
15331
|
+
.magic_comment_list = PM_LIST_EMPTY,
|
14431
15332
|
.warning_list = PM_LIST_EMPTY,
|
14432
15333
|
.error_list = PM_LIST_EMPTY,
|
14433
15334
|
.current_scope = NULL,
|
@@ -14441,6 +15342,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch
|
|
14441
15342
|
.constant_pool = PM_CONSTANT_POOL_EMPTY,
|
14442
15343
|
.newline_list = PM_NEWLINE_LIST_EMPTY,
|
14443
15344
|
.integer_base = 0,
|
15345
|
+
.current_string = PM_EMPTY_STRING,
|
14444
15346
|
.command_start = true,
|
14445
15347
|
.recovering = false,
|
14446
15348
|
.encoding_changed = false,
|
@@ -14521,6 +15423,19 @@ pm_comment_list_free(pm_list_t *list) {
|
|
14521
15423
|
}
|
14522
15424
|
}
|
14523
15425
|
|
15426
|
+
// Free all of the memory associated with the magic comment list.
|
15427
|
+
static inline void
|
15428
|
+
pm_magic_comment_list_free(pm_list_t *list) {
|
15429
|
+
pm_list_node_t *node, *next;
|
15430
|
+
|
15431
|
+
for (node = list->head; node != NULL; node = next) {
|
15432
|
+
next = node->next;
|
15433
|
+
|
15434
|
+
pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node;
|
15435
|
+
free(magic_comment);
|
15436
|
+
}
|
15437
|
+
}
|
15438
|
+
|
14524
15439
|
// Free any memory associated with the given parser.
|
14525
15440
|
PRISM_EXPORTED_FUNCTION void
|
14526
15441
|
pm_parser_free(pm_parser_t *parser) {
|
@@ -14528,6 +15443,7 @@ pm_parser_free(pm_parser_t *parser) {
|
|
14528
15443
|
pm_diagnostic_list_free(&parser->error_list);
|
14529
15444
|
pm_diagnostic_list_free(&parser->warning_list);
|
14530
15445
|
pm_comment_list_free(&parser->comment_list);
|
15446
|
+
pm_magic_comment_list_free(&parser->magic_comment_list);
|
14531
15447
|
pm_constant_pool_free(&parser->constant_pool);
|
14532
15448
|
pm_newline_list_free(&parser->newline_list);
|
14533
15449
|
|
@@ -14578,10 +15494,11 @@ pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, cons
|
|
14578
15494
|
pm_parser_free(&parser);
|
14579
15495
|
}
|
14580
15496
|
|
14581
|
-
#undef PM_LOCATION_NULL_VALUE
|
14582
|
-
#undef PM_LOCATION_TOKEN_VALUE
|
14583
|
-
#undef PM_LOCATION_NODE_VALUE
|
14584
|
-
#undef PM_LOCATION_NODE_BASE_VALUE
|
14585
15497
|
#undef PM_CASE_KEYWORD
|
14586
15498
|
#undef PM_CASE_OPERATOR
|
14587
15499
|
#undef PM_CASE_WRITABLE
|
15500
|
+
#undef PM_EMPTY_STRING
|
15501
|
+
#undef PM_LOCATION_NODE_BASE_VALUE
|
15502
|
+
#undef PM_LOCATION_NODE_VALUE
|
15503
|
+
#undef PM_LOCATION_NULL_VALUE
|
15504
|
+
#undef PM_LOCATION_TOKEN_VALUE
|