prism 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -1
- data/README.md +4 -1
- data/config.yml +10 -14
- data/docs/fuzzing.md +5 -10
- data/docs/prism.png +0 -0
- data/docs/serialization.md +10 -0
- data/ext/prism/api_node.c +35 -28
- data/ext/prism/extension.c +35 -48
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +38 -36
- data/include/prism/node.h +1 -0
- data/include/prism/parser.h +26 -0
- data/include/prism/util/pm_buffer.h +3 -0
- data/include/prism/util/pm_constant_pool.h +5 -0
- data/include/prism/util/pm_string.h +2 -1
- data/include/prism/version.h +2 -2
- data/include/prism.h +0 -1
- data/lib/prism/compiler.rb +141 -141
- data/lib/prism/ffi.rb +2 -2
- data/lib/prism/lex_compat.rb +42 -8
- data/lib/prism/node.rb +1456 -46
- data/lib/prism/node_ext.rb +44 -0
- data/lib/prism/parse_result.rb +32 -5
- data/lib/prism/pattern.rb +1 -1
- data/lib/prism/serialize.rb +16 -14
- data/prism.gemspec +2 -3
- data/src/diagnostic.c +1 -1
- data/src/node.c +0 -14
- data/src/prettyprint.c +35 -35
- data/src/prism.c +1728 -811
- data/src/serialize.c +45 -22
- data/src/util/pm_buffer.c +9 -7
- metadata +3 -4
- data/include/prism/unescape.h +0 -48
- data/src/unescape.c +0 -637
data/src/prism.c
CHANGED
@@ -421,6 +421,63 @@ debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * call
|
|
421
421
|
#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
|
422
422
|
#endif
|
423
423
|
|
424
|
+
/******************************************************************************/
|
425
|
+
/* Diagnostic-related functions */
|
426
|
+
/******************************************************************************/
|
427
|
+
|
428
|
+
// Append an error to the list of errors on the parser.
|
429
|
+
static inline void
|
430
|
+
pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
|
431
|
+
pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
|
432
|
+
}
|
433
|
+
|
434
|
+
// Append an error to the list of errors on the parser using the location of the
|
435
|
+
// current token.
|
436
|
+
static inline void
|
437
|
+
pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
438
|
+
pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
|
439
|
+
}
|
440
|
+
|
441
|
+
// Append an error to the list of errors on the parser using the given location.
|
442
|
+
static inline void
|
443
|
+
pm_parser_err_location(pm_parser_t *parser, const pm_location_t *location, pm_diagnostic_id_t diag_id) {
|
444
|
+
pm_parser_err(parser, location->start, location->end, diag_id);
|
445
|
+
}
|
446
|
+
|
447
|
+
// Append an error to the list of errors on the parser using the location of the
|
448
|
+
// given node.
|
449
|
+
static inline void
|
450
|
+
pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
|
451
|
+
pm_parser_err(parser, node->location.start, node->location.end, diag_id);
|
452
|
+
}
|
453
|
+
|
454
|
+
// Append an error to the list of errors on the parser using the location of the
|
455
|
+
// previous token.
|
456
|
+
static inline void
|
457
|
+
pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
458
|
+
pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
|
459
|
+
}
|
460
|
+
|
461
|
+
// Append an error to the list of errors on the parser using the location of the
|
462
|
+
// given token.
|
463
|
+
static inline void
|
464
|
+
pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
|
465
|
+
pm_parser_err(parser, token->start, token->end, diag_id);
|
466
|
+
}
|
467
|
+
|
468
|
+
// Append a warning to the list of warnings on the parser.
|
469
|
+
static inline void
|
470
|
+
pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
|
471
|
+
pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
|
472
|
+
}
|
473
|
+
|
474
|
+
// Append a warning to the list of warnings on the parser using the location of
|
475
|
+
// the given token.
|
476
|
+
static inline void
|
477
|
+
pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
|
478
|
+
pm_parser_warn(parser, token->start, token->end, diag_id);
|
479
|
+
}
|
480
|
+
|
424
481
|
/******************************************************************************/
|
425
482
|
/* Node-related functions */
|
426
483
|
/******************************************************************************/
|
@@ -437,6 +494,22 @@ pm_parser_constant_id_owned(pm_parser_t *parser, const uint8_t *start, size_t le
|
|
437
494
|
return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
|
438
495
|
}
|
439
496
|
|
497
|
+
// Retrieve the constant pool id for the given static literal C string.
|
498
|
+
static inline pm_constant_id_t
|
499
|
+
pm_parser_constant_id_static(pm_parser_t *parser, const char *start, size_t length) {
|
500
|
+
uint8_t *owned_copy;
|
501
|
+
if (length > 0) {
|
502
|
+
owned_copy = malloc(length);
|
503
|
+
memcpy(owned_copy, start, length);
|
504
|
+
} else {
|
505
|
+
owned_copy = malloc(1);
|
506
|
+
owned_copy[0] = '\0';
|
507
|
+
}
|
508
|
+
return pm_constant_pool_insert_owned(&parser->constant_pool, owned_copy, length);
|
509
|
+
// Does not work because the static literal cannot be serialized as an offset of source
|
510
|
+
// return pm_constant_pool_insert_shared(&parser->constant_pool, start, length);
|
511
|
+
}
|
512
|
+
|
440
513
|
// Retrieve the constant pool id for the given token.
|
441
514
|
static inline pm_constant_id_t
|
442
515
|
pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
|
@@ -582,12 +655,7 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
|
|
582
655
|
|
583
656
|
// If we didn't hit a case before this check, then at this point we need to
|
584
657
|
// add a syntax error.
|
585
|
-
|
586
|
-
&parser->error_list,
|
587
|
-
block->base.location.start,
|
588
|
-
block->base.location.end,
|
589
|
-
PM_ERR_ARGUMENT_UNEXPECTED_BLOCK
|
590
|
-
);
|
658
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
|
591
659
|
}
|
592
660
|
|
593
661
|
/******************************************************************************/
|
@@ -601,6 +669,7 @@ pm_scope_node_init(pm_node_t *node, pm_scope_node_t *scope) {
|
|
601
669
|
scope->base.location.start = node->location.start;
|
602
670
|
scope->base.location.end = node->location.end;
|
603
671
|
|
672
|
+
scope->ast_node = node;
|
604
673
|
scope->parameters = NULL;
|
605
674
|
scope->body = NULL;
|
606
675
|
pm_constant_id_list_init(&scope->locals);
|
@@ -626,6 +695,11 @@ pm_scope_node_init(pm_node_t *node, pm_scope_node_t *scope) {
|
|
626
695
|
scope->locals = cast->locals;
|
627
696
|
break;
|
628
697
|
}
|
698
|
+
case PM_FOR_NODE: {
|
699
|
+
pm_for_node_t *cast = (pm_for_node_t *)node;
|
700
|
+
scope->body = (pm_node_t *)cast->statements;
|
701
|
+
break;
|
702
|
+
}
|
629
703
|
case PM_LAMBDA_NODE: {
|
630
704
|
pm_lambda_node_t *cast = (pm_lambda_node_t *) node;
|
631
705
|
if (cast->parameters) scope->parameters = cast->parameters->parameters;
|
@@ -679,14 +753,14 @@ parse_decimal_number(pm_parser_t *parser, const uint8_t *start, const uint8_t *e
|
|
679
753
|
unsigned long value = strtoul(digits, &endptr, 10);
|
680
754
|
|
681
755
|
if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) {
|
682
|
-
|
756
|
+
pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
|
683
757
|
value = UINT32_MAX;
|
684
758
|
}
|
685
759
|
|
686
760
|
free(digits);
|
687
761
|
|
688
762
|
if (value > UINT32_MAX) {
|
689
|
-
|
763
|
+
pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
|
690
764
|
value = UINT32_MAX;
|
691
765
|
}
|
692
766
|
|
@@ -907,7 +981,7 @@ pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
|
|
907
981
|
|
908
982
|
// If the element is not a static literal, then the array is not a static
|
909
983
|
// literal. Turn that flag off.
|
910
|
-
if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || (element->flags & PM_NODE_FLAG_STATIC_LITERAL) == 0) {
|
984
|
+
if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || (element->flags & PM_NODE_FLAG_STATIC_LITERAL) == 0) {
|
911
985
|
node->base.flags &= (pm_node_flags_t) ~PM_NODE_FLAG_STATIC_LITERAL;
|
912
986
|
}
|
913
987
|
}
|
@@ -1051,8 +1125,10 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
|
|
1051
1125
|
end = key->location.end;
|
1052
1126
|
}
|
1053
1127
|
|
1128
|
+
// If the key and value of this assoc node are both static literals, then
|
1129
|
+
// we can mark this node as a static literal.
|
1054
1130
|
pm_node_flags_t flags = 0;
|
1055
|
-
if (value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE)) {
|
1131
|
+
if (value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)) {
|
1056
1132
|
flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
|
1057
1133
|
}
|
1058
1134
|
|
@@ -1341,7 +1417,8 @@ pm_call_node_create(pm_parser_t *parser) {
|
|
1341
1417
|
.opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
1342
1418
|
.arguments = NULL,
|
1343
1419
|
.closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
1344
|
-
.block = NULL
|
1420
|
+
.block = NULL,
|
1421
|
+
.name = 0
|
1345
1422
|
};
|
1346
1423
|
|
1347
1424
|
return node;
|
@@ -1369,7 +1446,7 @@ pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_
|
|
1369
1446
|
node->closing_loc = arguments->closing_loc;
|
1370
1447
|
node->block = arguments->block;
|
1371
1448
|
|
1372
|
-
|
1449
|
+
node->name = pm_parser_constant_id_static(parser, "[]", 2);
|
1373
1450
|
return node;
|
1374
1451
|
}
|
1375
1452
|
|
@@ -1388,7 +1465,7 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
|
|
1388
1465
|
pm_arguments_node_arguments_append(arguments, argument);
|
1389
1466
|
node->arguments = arguments;
|
1390
1467
|
|
1391
|
-
|
1468
|
+
node->name = pm_parser_constant_id_token(parser, operator);
|
1392
1469
|
return node;
|
1393
1470
|
}
|
1394
1471
|
|
@@ -1420,7 +1497,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
|
|
1420
1497
|
node->base.flags |= PM_CALL_NODE_FLAGS_SAFE_NAVIGATION;
|
1421
1498
|
}
|
1422
1499
|
|
1423
|
-
|
1500
|
+
node->name = pm_parser_constant_id_token(parser, message);
|
1424
1501
|
return node;
|
1425
1502
|
}
|
1426
1503
|
|
@@ -1447,7 +1524,7 @@ pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments
|
|
1447
1524
|
node->closing_loc = arguments->closing_loc;
|
1448
1525
|
node->block = arguments->block;
|
1449
1526
|
|
1450
|
-
|
1527
|
+
node->name = pm_parser_constant_id_token(parser, message);
|
1451
1528
|
return node;
|
1452
1529
|
}
|
1453
1530
|
|
@@ -1469,7 +1546,7 @@ pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *me
|
|
1469
1546
|
node->arguments = arguments->arguments;
|
1470
1547
|
node->closing_loc = arguments->closing_loc;
|
1471
1548
|
|
1472
|
-
|
1549
|
+
node->name = pm_parser_constant_id_static(parser, "!", 1);
|
1473
1550
|
return node;
|
1474
1551
|
}
|
1475
1552
|
|
@@ -1496,7 +1573,7 @@ pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token
|
|
1496
1573
|
node->base.flags |= PM_CALL_NODE_FLAGS_SAFE_NAVIGATION;
|
1497
1574
|
}
|
1498
1575
|
|
1499
|
-
|
1576
|
+
node->name = pm_parser_constant_id_static(parser, "call", 4);
|
1500
1577
|
return node;
|
1501
1578
|
}
|
1502
1579
|
|
@@ -1511,7 +1588,7 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *
|
|
1511
1588
|
node->receiver = receiver;
|
1512
1589
|
node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
|
1513
1590
|
|
1514
|
-
|
1591
|
+
node->name = pm_parser_constant_id_static(parser, name, strlen(name));
|
1515
1592
|
return node;
|
1516
1593
|
}
|
1517
1594
|
|
@@ -1524,7 +1601,7 @@ pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
|
|
1524
1601
|
node->base.location = PM_LOCATION_TOKEN_VALUE(message);
|
1525
1602
|
node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
|
1526
1603
|
|
1527
|
-
|
1604
|
+
node->name = pm_parser_constant_id_token(parser, message);
|
1528
1605
|
return node;
|
1529
1606
|
}
|
1530
1607
|
|
@@ -1537,17 +1614,18 @@ pm_call_node_variable_call_p(pm_call_node_t *node) {
|
|
1537
1614
|
|
1538
1615
|
// Initialize the read name by reading the write name and chopping off the '='.
|
1539
1616
|
static void
|
1540
|
-
pm_call_write_read_name_init(
|
1541
|
-
|
1542
|
-
|
1617
|
+
pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
|
1618
|
+
pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
|
1619
|
+
if (write_constant->length >= 1) {
|
1620
|
+
size_t length = write_constant->length - 1;
|
1543
1621
|
|
1544
1622
|
void *memory = malloc(length);
|
1545
|
-
memcpy(memory,
|
1623
|
+
memcpy(memory, write_constant->start, length);
|
1546
1624
|
|
1547
|
-
|
1625
|
+
*read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
|
1548
1626
|
} else {
|
1549
1627
|
// We can get here if the message was missing because of a syntax error.
|
1550
|
-
|
1628
|
+
*read_name = pm_parser_constant_id_static(parser, "", 0);
|
1551
1629
|
}
|
1552
1630
|
}
|
1553
1631
|
|
@@ -1573,13 +1651,13 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
|
|
1573
1651
|
.opening_loc = target->opening_loc,
|
1574
1652
|
.arguments = target->arguments,
|
1575
1653
|
.closing_loc = target->closing_loc,
|
1576
|
-
.read_name =
|
1654
|
+
.read_name = 0,
|
1577
1655
|
.write_name = target->name,
|
1578
1656
|
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
1579
1657
|
.value = value
|
1580
1658
|
};
|
1581
1659
|
|
1582
|
-
pm_call_write_read_name_init(&node->read_name, &node->write_name);
|
1660
|
+
pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
|
1583
1661
|
|
1584
1662
|
// Here we're going to free the target, since it is no longer necessary.
|
1585
1663
|
// However, we don't want to call `pm_node_destroy` because we want to keep
|
@@ -1610,14 +1688,14 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
|
|
1610
1688
|
.opening_loc = target->opening_loc,
|
1611
1689
|
.arguments = target->arguments,
|
1612
1690
|
.closing_loc = target->closing_loc,
|
1613
|
-
.read_name =
|
1691
|
+
.read_name = 0,
|
1614
1692
|
.write_name = target->name,
|
1615
1693
|
.operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
1616
1694
|
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
1617
1695
|
.value = value
|
1618
1696
|
};
|
1619
1697
|
|
1620
|
-
pm_call_write_read_name_init(&node->read_name, &node->write_name);
|
1698
|
+
pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
|
1621
1699
|
|
1622
1700
|
// Here we're going to free the target, since it is no longer necessary.
|
1623
1701
|
// However, we don't want to call `pm_node_destroy` because we want to keep
|
@@ -1649,13 +1727,13 @@ pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
|
|
1649
1727
|
.opening_loc = target->opening_loc,
|
1650
1728
|
.arguments = target->arguments,
|
1651
1729
|
.closing_loc = target->closing_loc,
|
1652
|
-
.read_name =
|
1730
|
+
.read_name = 0,
|
1653
1731
|
.write_name = target->name,
|
1654
1732
|
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
1655
1733
|
.value = value
|
1656
1734
|
};
|
1657
1735
|
|
1658
|
-
pm_call_write_read_name_init(&node->read_name, &node->write_name);
|
1736
|
+
pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
|
1659
1737
|
|
1660
1738
|
// Here we're going to free the target, since it is no longer necessary.
|
1661
1739
|
// However, we don't want to call `pm_node_destroy` because we want to keep
|
@@ -3372,11 +3450,20 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name,
|
|
3372
3450
|
return node;
|
3373
3451
|
}
|
3374
3452
|
|
3453
|
+
static inline bool
|
3454
|
+
token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
|
3455
|
+
return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
|
3456
|
+
}
|
3457
|
+
|
3375
3458
|
// Allocate and initialize a new LocalVariableTargetNode node.
|
3376
3459
|
static pm_local_variable_target_node_t *
|
3377
3460
|
pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) {
|
3378
3461
|
pm_local_variable_target_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_target_node_t);
|
3379
3462
|
|
3463
|
+
if (token_is_numbered_parameter(name->start, name->end)) {
|
3464
|
+
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
3465
|
+
}
|
3466
|
+
|
3380
3467
|
*node = (pm_local_variable_target_node_t) {
|
3381
3468
|
{
|
3382
3469
|
.type = PM_LOCAL_VARIABLE_TARGET_NODE,
|
@@ -3870,10 +3957,27 @@ pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, con
|
|
3870
3957
|
static pm_range_node_t *
|
3871
3958
|
pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
|
3872
3959
|
pm_range_node_t *node = PM_ALLOC_NODE(parser, pm_range_node_t);
|
3960
|
+
pm_node_flags_t flags = 0;
|
3961
|
+
|
3962
|
+
// Indicate that this node an exclusive range if the operator is `...`.
|
3963
|
+
if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
|
3964
|
+
flags |= PM_RANGE_FLAGS_EXCLUDE_END;
|
3965
|
+
}
|
3966
|
+
|
3967
|
+
// Indicate that this node is a static literal (i.e., can be compiled with
|
3968
|
+
// a putobject in CRuby) if the left and right are implicit nil, explicit
|
3969
|
+
// nil, or integers.
|
3970
|
+
if (
|
3971
|
+
(left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
|
3972
|
+
(right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
|
3973
|
+
) {
|
3974
|
+
flags |= PM_NODE_FLAG_STATIC_LITERAL;
|
3975
|
+
}
|
3873
3976
|
|
3874
3977
|
*node = (pm_range_node_t) {
|
3875
3978
|
{
|
3876
3979
|
.type = PM_RANGE_NODE,
|
3980
|
+
.flags = flags,
|
3877
3981
|
.location = {
|
3878
3982
|
.start = (left == NULL ? operator->start : left->location.start),
|
3879
3983
|
.end = (right == NULL ? operator->end : right->location.end)
|
@@ -3884,15 +3988,6 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope
|
|
3884
3988
|
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
|
3885
3989
|
};
|
3886
3990
|
|
3887
|
-
switch (operator->type) {
|
3888
|
-
case PM_TOKEN_DOT_DOT_DOT:
|
3889
|
-
case PM_TOKEN_UDOT_DOT_DOT:
|
3890
|
-
node->base.flags |= PM_RANGE_FLAGS_EXCLUDE_END;
|
3891
|
-
break;
|
3892
|
-
default:
|
3893
|
-
break;
|
3894
|
-
}
|
3895
|
-
|
3896
3991
|
return node;
|
3897
3992
|
}
|
3898
3993
|
|
@@ -3906,9 +4001,10 @@ pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
3906
4001
|
return node;
|
3907
4002
|
}
|
3908
4003
|
|
3909
|
-
// Allocate a new RegularExpressionNode node
|
4004
|
+
// Allocate a new initialize a new RegularExpressionNode node with the given
|
4005
|
+
// unescaped string.
|
3910
4006
|
static pm_regular_expression_node_t *
|
3911
|
-
|
4007
|
+
pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
|
3912
4008
|
pm_regular_expression_node_t *node = PM_ALLOC_NODE(parser, pm_regular_expression_node_t);
|
3913
4009
|
|
3914
4010
|
*node = (pm_regular_expression_node_t) {
|
@@ -3923,12 +4019,18 @@ pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening
|
|
3923
4019
|
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
|
3924
4020
|
.content_loc = PM_LOCATION_TOKEN_VALUE(content),
|
3925
4021
|
.closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
|
3926
|
-
.unescaped =
|
4022
|
+
.unescaped = *unescaped
|
3927
4023
|
};
|
3928
4024
|
|
3929
4025
|
return node;
|
3930
4026
|
}
|
3931
4027
|
|
4028
|
+
// Allocate a new initialize a new RegularExpressionNode node.
|
4029
|
+
static inline pm_regular_expression_node_t *
|
4030
|
+
pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
|
4031
|
+
return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_EMPTY_STRING);
|
4032
|
+
}
|
4033
|
+
|
3932
4034
|
// Allocate a new RequiredDestructuredParameterNode node.
|
3933
4035
|
static pm_required_destructured_parameter_node_t *
|
3934
4036
|
pm_required_destructured_parameter_node_create(pm_parser_t *parser, const pm_token_t *opening) {
|
@@ -4274,9 +4376,9 @@ pm_string_concat_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *ri
|
|
4274
4376
|
return node;
|
4275
4377
|
}
|
4276
4378
|
|
4277
|
-
// Allocate a new StringNode node.
|
4278
|
-
static pm_string_node_t *
|
4279
|
-
|
4379
|
+
// Allocate a new StringNode node with the current string on the parser.
|
4380
|
+
static inline pm_string_node_t *
|
4381
|
+
pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
|
4280
4382
|
pm_string_node_t *node = PM_ALLOC_NODE(parser, pm_string_node_t);
|
4281
4383
|
pm_node_flags_t flags = 0;
|
4282
4384
|
|
@@ -4296,12 +4398,27 @@ pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
|
|
4296
4398
|
.opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
|
4297
4399
|
.content_loc = PM_LOCATION_TOKEN_VALUE(content),
|
4298
4400
|
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
|
4299
|
-
.unescaped =
|
4401
|
+
.unescaped = *string
|
4300
4402
|
};
|
4301
4403
|
|
4302
4404
|
return node;
|
4303
4405
|
}
|
4304
4406
|
|
4407
|
+
// Allocate a new StringNode node.
|
4408
|
+
static pm_string_node_t *
|
4409
|
+
pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
|
4410
|
+
return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_EMPTY_STRING);
|
4411
|
+
}
|
4412
|
+
|
4413
|
+
// Allocate a new StringNode node and create it using the current string on the
|
4414
|
+
// parser.
|
4415
|
+
static pm_string_node_t *
|
4416
|
+
pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
|
4417
|
+
pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
|
4418
|
+
parser->current_string = PM_EMPTY_STRING;
|
4419
|
+
return node;
|
4420
|
+
}
|
4421
|
+
|
4305
4422
|
// Allocate and initialize a new SuperNode node.
|
4306
4423
|
static pm_super_node_t *
|
4307
4424
|
pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
|
@@ -4338,9 +4455,10 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
|
|
4338
4455
|
return node;
|
4339
4456
|
}
|
4340
4457
|
|
4341
|
-
// Allocate a new SymbolNode node
|
4458
|
+
// Allocate and initialize a new SymbolNode node with the given unescaped
|
4459
|
+
// string.
|
4342
4460
|
static pm_symbol_node_t *
|
4343
|
-
|
4461
|
+
pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped) {
|
4344
4462
|
pm_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
|
4345
4463
|
|
4346
4464
|
*node = (pm_symbol_node_t) {
|
@@ -4355,12 +4473,26 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
|
|
4355
4473
|
.opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
|
4356
4474
|
.value_loc = PM_LOCATION_TOKEN_VALUE(value),
|
4357
4475
|
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
|
4358
|
-
.unescaped =
|
4476
|
+
.unescaped = *unescaped
|
4359
4477
|
};
|
4360
4478
|
|
4361
4479
|
return node;
|
4362
4480
|
}
|
4363
4481
|
|
4482
|
+
// Allocate and initialize a new SymbolNode node.
|
4483
|
+
static inline pm_symbol_node_t *
|
4484
|
+
pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
4485
|
+
return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_EMPTY_STRING);
|
4486
|
+
}
|
4487
|
+
|
4488
|
+
// Allocate and initialize a new SymbolNode node with the current string.
|
4489
|
+
static pm_symbol_node_t *
|
4490
|
+
pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
4491
|
+
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string);
|
4492
|
+
parser->current_string = PM_EMPTY_STRING;
|
4493
|
+
return node;
|
4494
|
+
}
|
4495
|
+
|
4364
4496
|
// Allocate and initialize a new SymbolNode node from a label.
|
4365
4497
|
static pm_symbol_node_t *
|
4366
4498
|
pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
|
@@ -4376,8 +4508,6 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
4376
4508
|
|
4377
4509
|
assert((label.end - label.start) >= 0);
|
4378
4510
|
pm_string_shared_init(&node->unescaped, label.start, label.end);
|
4379
|
-
|
4380
|
-
pm_unescape_manipulate_string(parser, &node->unescaped, PM_UNESCAPE_ALL);
|
4381
4511
|
break;
|
4382
4512
|
}
|
4383
4513
|
case PM_TOKEN_MISSING: {
|
@@ -4710,9 +4840,10 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
|
|
4710
4840
|
return node;
|
4711
4841
|
}
|
4712
4842
|
|
4713
|
-
// Allocate and initialize a new XStringNode node
|
4843
|
+
// Allocate and initialize a new XStringNode node with the given unescaped
|
4844
|
+
// string.
|
4714
4845
|
static pm_x_string_node_t *
|
4715
|
-
|
4846
|
+
pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
|
4716
4847
|
pm_x_string_node_t *node = PM_ALLOC_NODE(parser, pm_x_string_node_t);
|
4717
4848
|
|
4718
4849
|
*node = (pm_x_string_node_t) {
|
@@ -4726,12 +4857,18 @@ pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_
|
|
4726
4857
|
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
|
4727
4858
|
.content_loc = PM_LOCATION_TOKEN_VALUE(content),
|
4728
4859
|
.closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
|
4729
|
-
.unescaped =
|
4860
|
+
.unescaped = *unescaped
|
4730
4861
|
};
|
4731
4862
|
|
4732
4863
|
return node;
|
4733
4864
|
}
|
4734
4865
|
|
4866
|
+
// Allocate and initialize a new XStringNode node.
|
4867
|
+
static inline pm_x_string_node_t *
|
4868
|
+
pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
|
4869
|
+
return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_EMPTY_STRING);
|
4870
|
+
}
|
4871
|
+
|
4735
4872
|
// Allocate a new YieldNode node.
|
4736
4873
|
static pm_yield_node_t *
|
4737
4874
|
pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
|
@@ -4765,8 +4902,6 @@ pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_lo
|
|
4765
4902
|
return node;
|
4766
4903
|
}
|
4767
4904
|
|
4768
|
-
|
4769
|
-
#undef PM_EMPTY_STRING
|
4770
4905
|
#undef PM_ALLOC_NODE
|
4771
4906
|
|
4772
4907
|
/******************************************************************************/
|
@@ -4783,7 +4918,8 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
4783
4918
|
.previous = parser->current_scope,
|
4784
4919
|
.closed = closed,
|
4785
4920
|
.explicit_params = false,
|
4786
|
-
.numbered_params = false
|
4921
|
+
.numbered_params = false,
|
4922
|
+
.transparent = false
|
4787
4923
|
};
|
4788
4924
|
|
4789
4925
|
pm_constant_id_list_init(&scope->locals);
|
@@ -4792,6 +4928,25 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
4792
4928
|
return true;
|
4793
4929
|
}
|
4794
4930
|
|
4931
|
+
// Allocate and initialize a new scope. Push it onto the scope stack.
|
4932
|
+
static bool
|
4933
|
+
pm_parser_scope_push_transparent(pm_parser_t *parser) {
|
4934
|
+
pm_scope_t *scope = (pm_scope_t *) malloc(sizeof(pm_scope_t));
|
4935
|
+
if (scope == NULL) return false;
|
4936
|
+
|
4937
|
+
*scope = (pm_scope_t) {
|
4938
|
+
.previous = parser->current_scope,
|
4939
|
+
.closed = false,
|
4940
|
+
.explicit_params = false,
|
4941
|
+
.numbered_params = false,
|
4942
|
+
.transparent = true
|
4943
|
+
};
|
4944
|
+
|
4945
|
+
parser->current_scope = scope;
|
4946
|
+
|
4947
|
+
return true;
|
4948
|
+
}
|
4949
|
+
|
4795
4950
|
// Check if the current scope has a given local variables.
|
4796
4951
|
static int
|
4797
4952
|
pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
|
@@ -4800,7 +4955,8 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
|
|
4800
4955
|
int depth = 0;
|
4801
4956
|
|
4802
4957
|
while (scope != NULL) {
|
4803
|
-
if (
|
4958
|
+
if (!scope->transparent &&
|
4959
|
+
pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
|
4804
4960
|
if (scope->closed) break;
|
4805
4961
|
|
4806
4962
|
scope = scope->previous;
|
@@ -4813,8 +4969,12 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
|
|
4813
4969
|
// Add a constant id to the local table of the current scope.
|
4814
4970
|
static inline void
|
4815
4971
|
pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id) {
|
4816
|
-
|
4817
|
-
|
4972
|
+
pm_scope_t *scope = parser->current_scope;
|
4973
|
+
while (scope && scope->transparent) scope = scope->previous;
|
4974
|
+
|
4975
|
+
assert(scope != NULL);
|
4976
|
+
if (!pm_constant_id_list_includes(&scope->locals, constant_id)) {
|
4977
|
+
pm_constant_id_list_append(&scope->locals, constant_id);
|
4818
4978
|
}
|
4819
4979
|
}
|
4820
4980
|
|
@@ -4839,18 +4999,13 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
|
|
4839
4999
|
if (constant_id != 0) pm_parser_local_add(parser, constant_id);
|
4840
5000
|
}
|
4841
5001
|
|
4842
|
-
static inline bool
|
4843
|
-
token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
|
4844
|
-
return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
|
4845
|
-
}
|
4846
|
-
|
4847
5002
|
// Add a parameter name to the current scope and check whether the name of the
|
4848
5003
|
// parameter is unique or not.
|
4849
5004
|
static void
|
4850
|
-
pm_parser_parameter_name_check(pm_parser_t *parser, pm_token_t *name) {
|
5005
|
+
pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
|
4851
5006
|
// We want to check whether the parameter name is a numbered parameter or not.
|
4852
5007
|
if (token_is_numbered_parameter(name->start, name->end)) {
|
4853
|
-
|
5008
|
+
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
4854
5009
|
}
|
4855
5010
|
|
4856
5011
|
// We want to ignore any parameter name that starts with an underscore.
|
@@ -4861,7 +5016,7 @@ pm_parser_parameter_name_check(pm_parser_t *parser, pm_token_t *name) {
|
|
4861
5016
|
pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
|
4862
5017
|
|
4863
5018
|
if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
|
4864
|
-
|
5019
|
+
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
|
4865
5020
|
}
|
4866
5021
|
}
|
4867
5022
|
|
@@ -5007,17 +5162,6 @@ peek(pm_parser_t *parser) {
|
|
5007
5162
|
return peek_at(parser, parser->current.end);
|
5008
5163
|
}
|
5009
5164
|
|
5010
|
-
// Get the next string of length len in the source starting from parser->current.end.
|
5011
|
-
// If the string extends beyond the end of the source, return the empty string ""
|
5012
|
-
static inline const uint8_t *
|
5013
|
-
peek_string(pm_parser_t *parser, size_t len) {
|
5014
|
-
if (parser->current.end + len <= parser->end) {
|
5015
|
-
return parser->current.end;
|
5016
|
-
} else {
|
5017
|
-
return (const uint8_t *) "";
|
5018
|
-
}
|
5019
|
-
}
|
5020
|
-
|
5021
5165
|
// If the character to be read matches the given value, then returns true and
|
5022
5166
|
// advanced the current pointer.
|
5023
5167
|
static inline bool
|
@@ -5069,66 +5213,17 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
|
|
5069
5213
|
return memchr(cursor, '\n', (size_t) length);
|
5070
5214
|
}
|
5071
5215
|
|
5072
|
-
// Find the start of the encoding comment. This is effectively an inlined
|
5073
|
-
// version of strnstr with some modifications.
|
5074
|
-
static inline const uint8_t *
|
5075
|
-
parser_lex_encoding_comment_start(pm_parser_t *parser, const uint8_t *cursor, ptrdiff_t remaining) {
|
5076
|
-
assert(remaining >= 0);
|
5077
|
-
size_t length = (size_t) remaining;
|
5078
|
-
|
5079
|
-
size_t key_length = strlen("coding:");
|
5080
|
-
if (key_length > length) return NULL;
|
5081
|
-
|
5082
|
-
const uint8_t *cursor_limit = cursor + length - key_length + 1;
|
5083
|
-
while ((cursor = pm_memchr(cursor, 'c', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
|
5084
|
-
if (memcmp(cursor, "coding", key_length - 1) == 0) {
|
5085
|
-
size_t whitespace_after_coding = pm_strspn_inline_whitespace(cursor + key_length - 1, parser->end - (cursor + key_length - 1));
|
5086
|
-
size_t cur_pos = key_length + whitespace_after_coding;
|
5087
|
-
|
5088
|
-
if (cursor[cur_pos - 1] == ':' || cursor[cur_pos - 1] == '=') {
|
5089
|
-
return cursor + cur_pos;
|
5090
|
-
}
|
5091
|
-
}
|
5092
|
-
|
5093
|
-
cursor++;
|
5094
|
-
}
|
5095
|
-
|
5096
|
-
return NULL;
|
5097
|
-
}
|
5098
|
-
|
5099
5216
|
// Here we're going to check if this is a "magic" comment, and perform whatever
|
5100
5217
|
// actions are necessary for it here.
|
5101
5218
|
static void
|
5102
|
-
|
5103
|
-
|
5104
|
-
const uint8_t *end = parser->current.end;
|
5105
|
-
|
5106
|
-
// These are the patterns we're going to match to find the encoding comment.
|
5107
|
-
// This is definitely not complete or even really correct.
|
5108
|
-
const uint8_t *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
|
5109
|
-
|
5110
|
-
// If we didn't find anything that matched our patterns, then return. Note
|
5111
|
-
// that this does a _very_ poor job of actually finding the encoding, and
|
5112
|
-
// there is a lot of work to do here to better reflect actual magic comment
|
5113
|
-
// parsing from CRuby, but this at least gets us part of the way there.
|
5114
|
-
if (encoding_start == NULL) return;
|
5115
|
-
|
5116
|
-
// Skip any non-newline whitespace after the "coding:" or "coding=".
|
5117
|
-
encoding_start += pm_strspn_inline_whitespace(encoding_start, end - encoding_start);
|
5118
|
-
|
5119
|
-
// Now determine the end of the encoding string. This is either the end of
|
5120
|
-
// the line, the first whitespace character, or a punctuation mark.
|
5121
|
-
const uint8_t *encoding_end = pm_strpbrk(parser, encoding_start, (const uint8_t *) " \t\f\r\v\n;,", end - encoding_start);
|
5122
|
-
encoding_end = encoding_end == NULL ? end : encoding_end;
|
5123
|
-
|
5124
|
-
// Finally, we can determine the width of the encoding string.
|
5125
|
-
size_t width = (size_t) (encoding_end - encoding_start);
|
5219
|
+
parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
5220
|
+
size_t width = (size_t) (end - start);
|
5126
5221
|
|
5127
5222
|
// First, we're going to call out to a user-defined callback if one was
|
5128
5223
|
// provided. If they return an encoding struct that we can use, then we'll
|
5129
5224
|
// use that here.
|
5130
5225
|
if (parser->encoding_decode_callback != NULL) {
|
5131
|
-
pm_encoding_t *encoding = parser->encoding_decode_callback(parser,
|
5226
|
+
pm_encoding_t *encoding = parser->encoding_decode_callback(parser, start, width);
|
5132
5227
|
|
5133
5228
|
if (encoding != NULL) {
|
5134
5229
|
parser->encoding = *encoding;
|
@@ -5140,7 +5235,7 @@ parser_lex_encoding_comment(pm_parser_t *parser) {
|
|
5140
5235
|
// Extensions like utf-8 can contain extra encoding details like,
|
5141
5236
|
// utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
|
5142
5237
|
// treat any encoding starting utf-8 as utf-8.
|
5143
|
-
if ((
|
5238
|
+
if ((start + 5 <= end) && (pm_strncasecmp(start, (const uint8_t *) "utf-8", 5) == 0)) {
|
5144
5239
|
// We don't need to do anything here because the default encoding is
|
5145
5240
|
// already UTF-8. We'll just return.
|
5146
5241
|
return;
|
@@ -5149,7 +5244,7 @@ parser_lex_encoding_comment(pm_parser_t *parser) {
|
|
5149
5244
|
// Next, we're going to loop through each of the encodings that we handle
|
5150
5245
|
// explicitly. If we found one that we understand, we'll use that value.
|
5151
5246
|
#define ENCODING(value, prebuilt) \
|
5152
|
-
if (width == sizeof(value) - 1 &&
|
5247
|
+
if (width == sizeof(value) - 1 && start + width <= end && pm_strncasecmp(start, (const uint8_t *) value, width) == 0) { \
|
5153
5248
|
parser->encoding = prebuilt; \
|
5154
5249
|
parser->encoding_changed |= true; \
|
5155
5250
|
if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
|
@@ -5198,40 +5293,220 @@ parser_lex_encoding_comment(pm_parser_t *parser) {
|
|
5198
5293
|
// didn't understand the encoding that the user was trying to use. In this
|
5199
5294
|
// case we'll keep using the default encoding but add an error to the
|
5200
5295
|
// parser to indicate an unsuccessful parse.
|
5201
|
-
|
5296
|
+
pm_parser_err(parser, start, end, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
|
5297
|
+
}
|
5298
|
+
|
5299
|
+
// Look for a specific pattern of "coding" and potentially set the encoding on
|
5300
|
+
// the parser.
|
5301
|
+
static void
|
5302
|
+
parser_lex_magic_comment_encoding(pm_parser_t *parser) {
|
5303
|
+
const uint8_t *cursor = parser->current.start + 1;
|
5304
|
+
const uint8_t *end = parser->current.end;
|
5305
|
+
|
5306
|
+
bool separator = false;
|
5307
|
+
while (true) {
|
5308
|
+
if (end - cursor <= 6) return;
|
5309
|
+
switch (cursor[6]) {
|
5310
|
+
case 'C': case 'c': cursor += 6; continue;
|
5311
|
+
case 'O': case 'o': cursor += 5; continue;
|
5312
|
+
case 'D': case 'd': cursor += 4; continue;
|
5313
|
+
case 'I': case 'i': cursor += 3; continue;
|
5314
|
+
case 'N': case 'n': cursor += 2; continue;
|
5315
|
+
case 'G': case 'g': cursor += 1; continue;
|
5316
|
+
case '=': case ':':
|
5317
|
+
separator = true;
|
5318
|
+
cursor += 6;
|
5319
|
+
break;
|
5320
|
+
default:
|
5321
|
+
cursor += 6;
|
5322
|
+
if (pm_char_is_whitespace(*cursor)) break;
|
5323
|
+
continue;
|
5324
|
+
}
|
5325
|
+
if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
|
5326
|
+
separator = false;
|
5327
|
+
}
|
5328
|
+
|
5329
|
+
while (true) {
|
5330
|
+
do {
|
5331
|
+
if (++cursor >= end) return;
|
5332
|
+
} while (pm_char_is_whitespace(*cursor));
|
5333
|
+
|
5334
|
+
if (separator) break;
|
5335
|
+
if (*cursor != '=' && *cursor != ':') return;
|
5336
|
+
|
5337
|
+
separator = true;
|
5338
|
+
cursor++;
|
5339
|
+
}
|
5340
|
+
|
5341
|
+
const uint8_t *value_start = cursor;
|
5342
|
+
while ((*cursor == '-' || *cursor == '_' || parser->encoding.alnum_char(cursor, 1)) && ++cursor < end);
|
5343
|
+
|
5344
|
+
parser_lex_magic_comment_encoding_value(parser, value_start, cursor);
|
5202
5345
|
}
|
5203
5346
|
|
5204
5347
|
// Check if this is a magic comment that includes the frozen_string_literal
|
5205
5348
|
// pragma. If it does, set that field on the parser.
|
5206
5349
|
static void
|
5207
|
-
|
5208
|
-
const uint8_t *
|
5350
|
+
parser_lex_magic_comment_frozen_string_literal_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
5351
|
+
if (start + 4 <= end && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
|
5352
|
+
parser->frozen_string_literal = true;
|
5353
|
+
}
|
5354
|
+
}
|
5355
|
+
|
5356
|
+
static inline bool
|
5357
|
+
pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
|
5358
|
+
return b == '\'' || b == '"' || b == ':' || b == ';';
|
5359
|
+
}
|
5360
|
+
|
5361
|
+
// Find an emacs magic comment marker (-*-) within the given bounds. If one is
|
5362
|
+
// found, it returns a pointer to the start of the marker. Otherwise it returns
|
5363
|
+
// NULL.
|
5364
|
+
static inline const uint8_t *
|
5365
|
+
parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
|
5366
|
+
while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
|
5367
|
+
if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
|
5368
|
+
return cursor;
|
5369
|
+
}
|
5370
|
+
cursor++;
|
5371
|
+
}
|
5372
|
+
return NULL;
|
5373
|
+
}
|
5374
|
+
|
5375
|
+
// Parse the current token on the parser to see if it's a magic comment and
|
5376
|
+
// potentially perform some action based on that. A regular expression that this
|
5377
|
+
// function is effectively matching is:
|
5378
|
+
//
|
5379
|
+
// %r"([^\\s\'\":;]+)\\s*:\\s*(\"(?:\\\\.|[^\"])*\"|[^\"\\s;]+)[\\s;]*"
|
5380
|
+
//
|
5381
|
+
// It returns true if it consumes the entire comment. Otherwise it returns
|
5382
|
+
// false.
|
5383
|
+
static inline bool
|
5384
|
+
parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
|
5385
|
+
const uint8_t *start = parser->current.start + 1;
|
5209
5386
|
const uint8_t *end = parser->current.end;
|
5387
|
+
if (end - start <= 7) return false;
|
5388
|
+
|
5389
|
+
const uint8_t *cursor;
|
5390
|
+
bool indicator = false;
|
5210
5391
|
|
5211
|
-
|
5212
|
-
|
5392
|
+
if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
|
5393
|
+
start = cursor + 3;
|
5213
5394
|
|
5214
|
-
|
5395
|
+
if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
|
5396
|
+
end = cursor;
|
5397
|
+
indicator = true;
|
5398
|
+
} else {
|
5399
|
+
// If we have a start marker but not an end marker, then we cannot
|
5400
|
+
// have a magic comment.
|
5401
|
+
return false;
|
5402
|
+
}
|
5403
|
+
}
|
5215
5404
|
|
5216
|
-
|
5217
|
-
|
5218
|
-
|
5219
|
-
cursor += pm_strspn_inline_whitespace(cursor, end - cursor);
|
5405
|
+
cursor = start;
|
5406
|
+
while (cursor < end) {
|
5407
|
+
while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
|
5220
5408
|
|
5221
|
-
|
5222
|
-
|
5223
|
-
cursor += pm_strspn_inline_whitespace(cursor, end - cursor);
|
5409
|
+
const uint8_t *key_start = cursor;
|
5410
|
+
while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
|
5224
5411
|
|
5225
|
-
|
5226
|
-
|
5227
|
-
|
5412
|
+
const uint8_t *key_end = cursor;
|
5413
|
+
while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
|
5414
|
+
if (cursor == end) break;
|
5228
5415
|
|
5229
|
-
|
5416
|
+
if (*cursor == ':') {
|
5417
|
+
cursor++;
|
5418
|
+
} else {
|
5419
|
+
if (!indicator) return false;
|
5420
|
+
continue;
|
5421
|
+
}
|
5422
|
+
|
5423
|
+
while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
|
5424
|
+
if (cursor == end) break;
|
5425
|
+
|
5426
|
+
const uint8_t *value_start;
|
5427
|
+
const uint8_t *value_end;
|
5428
|
+
|
5429
|
+
if (*cursor == '"') {
|
5430
|
+
value_start = ++cursor;
|
5431
|
+
for (; cursor < end && *cursor != '"'; cursor++) {
|
5432
|
+
if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
|
5230
5433
|
}
|
5434
|
+
value_end = cursor;
|
5435
|
+
} else {
|
5436
|
+
value_start = cursor;
|
5437
|
+
while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
|
5438
|
+
value_end = cursor;
|
5231
5439
|
}
|
5232
5440
|
|
5233
|
-
|
5441
|
+
if (indicator) {
|
5442
|
+
while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
|
5443
|
+
} else {
|
5444
|
+
while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
|
5445
|
+
if (cursor != end) return false;
|
5446
|
+
}
|
5447
|
+
|
5448
|
+
// Here, we need to do some processing on the key to swap out dashes for
|
5449
|
+
// underscores. We only need to do this if there _is_ a dash in the key.
|
5450
|
+
pm_string_t key;
|
5451
|
+
const size_t key_length = (size_t) (key_end - key_start);
|
5452
|
+
const uint8_t *dash = pm_memchr(key_start, '-', (size_t) key_length, parser->encoding_changed, &parser->encoding);
|
5453
|
+
|
5454
|
+
if (dash == NULL) {
|
5455
|
+
pm_string_shared_init(&key, key_start, key_end);
|
5456
|
+
} else {
|
5457
|
+
size_t width = (size_t) (key_end - key_start);
|
5458
|
+
uint8_t *buffer = malloc(width);
|
5459
|
+
if (buffer == NULL) break;
|
5460
|
+
|
5461
|
+
memcpy(buffer, key_start, width);
|
5462
|
+
buffer[dash - key_start] = '_';
|
5463
|
+
|
5464
|
+
while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, &parser->encoding)) != NULL) {
|
5465
|
+
buffer[dash - key_start] = '_';
|
5466
|
+
}
|
5467
|
+
|
5468
|
+
pm_string_owned_init(&key, buffer, width);
|
5469
|
+
}
|
5470
|
+
|
5471
|
+
// Finally, we can start checking the key against the list of known
|
5472
|
+
// magic comment keys, and potentially change state based on that.
|
5473
|
+
const uint8_t *key_source = pm_string_source(&key);
|
5474
|
+
|
5475
|
+
// We only want to attempt to compare against encoding comments if it's
|
5476
|
+
// the first line in the file (or the second in the case of a shebang).
|
5477
|
+
if (parser->current.start == parser->encoding_comment_start) {
|
5478
|
+
if (
|
5479
|
+
(key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
|
5480
|
+
(key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
|
5481
|
+
) {
|
5482
|
+
parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
|
5483
|
+
}
|
5484
|
+
}
|
5485
|
+
|
5486
|
+
// We only want to handle frozen string literal comments if it's before
|
5487
|
+
// any semantic tokens have been seen.
|
5488
|
+
if (!semantic_token_seen) {
|
5489
|
+
if (key_length == 21 && pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
|
5490
|
+
parser_lex_magic_comment_frozen_string_literal_value(parser, value_start, value_end);
|
5491
|
+
}
|
5492
|
+
}
|
5493
|
+
|
5494
|
+
// When we're done, we want to free the string in case we had to
|
5495
|
+
// allocate memory for it.
|
5496
|
+
pm_string_free(&key);
|
5497
|
+
|
5498
|
+
// Allocate a new magic comment node to append to the parser's list.
|
5499
|
+
pm_magic_comment_t *magic_comment;
|
5500
|
+
if ((magic_comment = (pm_magic_comment_t *) calloc(sizeof(pm_magic_comment_t), 1)) != NULL) {
|
5501
|
+
magic_comment->key_start = key_start;
|
5502
|
+
magic_comment->value_start = value_start;
|
5503
|
+
magic_comment->key_length = (uint32_t) key_length;
|
5504
|
+
magic_comment->value_length = (uint32_t) (value_end - value_start);
|
5505
|
+
pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
|
5506
|
+
}
|
5234
5507
|
}
|
5508
|
+
|
5509
|
+
return true;
|
5235
5510
|
}
|
5236
5511
|
|
5237
5512
|
/******************************************************************************/
|
@@ -5366,7 +5641,7 @@ context_def_p(pm_parser_t *parser) {
|
|
5366
5641
|
static void
|
5367
5642
|
pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *invalid) {
|
5368
5643
|
if (invalid != NULL) {
|
5369
|
-
|
5644
|
+
pm_parser_err(parser, invalid, invalid + 1, PM_ERR_INVALID_NUMBER_UNDERSCORE);
|
5370
5645
|
}
|
5371
5646
|
}
|
5372
5647
|
|
@@ -5430,7 +5705,7 @@ lex_optional_float_suffix(pm_parser_t *parser) {
|
|
5430
5705
|
parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
|
5431
5706
|
type = PM_TOKEN_FLOAT;
|
5432
5707
|
} else {
|
5433
|
-
|
5708
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
|
5434
5709
|
type = PM_TOKEN_FLOAT;
|
5435
5710
|
}
|
5436
5711
|
}
|
@@ -5451,7 +5726,7 @@ lex_numeric_prefix(pm_parser_t *parser) {
|
|
5451
5726
|
if (pm_char_is_decimal_digit(peek(parser))) {
|
5452
5727
|
parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
|
5453
5728
|
} else {
|
5454
|
-
|
5729
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
|
5455
5730
|
}
|
5456
5731
|
|
5457
5732
|
break;
|
@@ -5463,7 +5738,7 @@ lex_numeric_prefix(pm_parser_t *parser) {
|
|
5463
5738
|
if (pm_char_is_binary_digit(peek(parser))) {
|
5464
5739
|
parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
|
5465
5740
|
} else {
|
5466
|
-
|
5741
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
|
5467
5742
|
}
|
5468
5743
|
|
5469
5744
|
parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
|
@@ -5476,7 +5751,7 @@ lex_numeric_prefix(pm_parser_t *parser) {
|
|
5476
5751
|
if (pm_char_is_octal_digit(peek(parser))) {
|
5477
5752
|
parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
|
5478
5753
|
} else {
|
5479
|
-
|
5754
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
|
5480
5755
|
}
|
5481
5756
|
|
5482
5757
|
parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
|
@@ -5503,7 +5778,7 @@ lex_numeric_prefix(pm_parser_t *parser) {
|
|
5503
5778
|
if (pm_char_is_hexadecimal_digit(peek(parser))) {
|
5504
5779
|
parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
|
5505
5780
|
} else {
|
5506
|
-
|
5781
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
|
5507
5782
|
}
|
5508
5783
|
|
5509
5784
|
parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
|
@@ -5581,7 +5856,7 @@ lex_numeric(pm_parser_t *parser) {
|
|
5581
5856
|
static pm_token_type_t
|
5582
5857
|
lex_global_variable(pm_parser_t *parser) {
|
5583
5858
|
if (parser->current.end >= parser->end) {
|
5584
|
-
|
5859
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
5585
5860
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
5586
5861
|
}
|
5587
5862
|
|
@@ -5622,7 +5897,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
5622
5897
|
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
5623
5898
|
|
5624
5899
|
// $0 isn't allowed to be followed by anything.
|
5625
|
-
|
5900
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
5626
5901
|
}
|
5627
5902
|
|
5628
5903
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -5653,7 +5928,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
5653
5928
|
} else {
|
5654
5929
|
// If we get here, then we have a $ followed by something that isn't
|
5655
5930
|
// recognized as a global variable.
|
5656
|
-
|
5931
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
5657
5932
|
}
|
5658
5933
|
|
5659
5934
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -5962,52 +6237,475 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
|
|
5962
6237
|
}
|
5963
6238
|
}
|
5964
6239
|
|
5965
|
-
|
5966
|
-
|
5967
|
-
|
5968
|
-
|
5969
|
-
|
5970
|
-
// \t horizontal tab, ASCII 09h (TAB)
|
5971
|
-
// \n newline (line feed), ASCII 0Ah (LF)
|
5972
|
-
// \v vertical tab, ASCII 0Bh (VT)
|
5973
|
-
// \f form feed, ASCII 0Ch (FF)
|
5974
|
-
// \r carriage return, ASCII 0Dh (CR)
|
5975
|
-
// \e escape, ASCII 1Bh (ESC)
|
5976
|
-
// \s space, ASCII 20h (SPC)
|
5977
|
-
// \\ backslash
|
5978
|
-
// \nnn octal bit pattern, where nnn is 1-3 octal digits ([0-7])
|
5979
|
-
// \xnn hexadecimal bit pattern, where nn is 1-2 hexadecimal digits ([0-9a-fA-F])
|
5980
|
-
// \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
|
5981
|
-
// \u{nnnn ...} Unicode character(s), where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F])
|
5982
|
-
// \cx or \C-x control character, where x is an ASCII printable character
|
5983
|
-
// \M-x meta character, where x is an ASCII printable character
|
5984
|
-
// \M-\C-x meta control character, where x is an ASCII printable character
|
5985
|
-
// \M-\cx same as above
|
5986
|
-
// \c\M-x same as above
|
5987
|
-
// \c? or \C-? delete, ASCII 7Fh (DEL)
|
5988
|
-
//
|
5989
|
-
static pm_token_type_t
|
5990
|
-
lex_question_mark(pm_parser_t *parser) {
|
5991
|
-
if (lex_state_end_p(parser)) {
|
5992
|
-
lex_state_set(parser, PM_LEX_STATE_BEG);
|
5993
|
-
return PM_TOKEN_QUESTION_MARK;
|
5994
|
-
}
|
6240
|
+
static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
|
6241
|
+
static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
|
6242
|
+
static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
|
6243
|
+
static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
|
6244
|
+
static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
|
5995
6245
|
|
5996
|
-
|
5997
|
-
|
5998
|
-
|
5999
|
-
|
6246
|
+
// This is a lookup table for whether or not an ASCII character is printable.
|
6247
|
+
static const bool ascii_printable_chars[] = {
|
6248
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
|
6249
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
6250
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
6251
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
6252
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
6253
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
|
6254
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
6255
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
|
6256
|
+
};
|
6000
6257
|
|
6001
|
-
|
6002
|
-
|
6003
|
-
|
6004
|
-
|
6258
|
+
static inline bool
|
6259
|
+
char_is_ascii_printable(const uint8_t b) {
|
6260
|
+
return (b < 0x80) && ascii_printable_chars[b];
|
6261
|
+
}
|
6262
|
+
|
6263
|
+
// Return the value that a hexadecimal digit character represents. For example,
|
6264
|
+
// transform 'a' into 10, 'b' into 11, etc.
|
6265
|
+
static inline uint8_t
|
6266
|
+
escape_hexadecimal_digit(const uint8_t value) {
|
6267
|
+
return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
|
6268
|
+
}
|
6269
|
+
|
6270
|
+
// Scan the 4 digits of a Unicode escape into the value. Returns the number of
|
6271
|
+
// digits scanned. This function assumes that the characters have already been
|
6272
|
+
// validated.
|
6273
|
+
static inline uint32_t
|
6274
|
+
escape_unicode(const uint8_t *string, size_t length) {
|
6275
|
+
uint32_t value = 0;
|
6276
|
+
for (size_t index = 0; index < length; index++) {
|
6277
|
+
if (index != 0) value <<= 4;
|
6278
|
+
value |= escape_hexadecimal_digit(string[index]);
|
6279
|
+
}
|
6280
|
+
return value;
|
6281
|
+
}
|
6282
|
+
|
6283
|
+
// Escape a single character value based on the given flags.
|
6284
|
+
static inline uint8_t
|
6285
|
+
escape_byte(uint8_t value, const uint8_t flags) {
|
6286
|
+
if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x1f;
|
6287
|
+
if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
|
6288
|
+
return value;
|
6289
|
+
}
|
6290
|
+
|
6291
|
+
// Write a unicode codepoint to the given buffer.
|
6292
|
+
static inline void
|
6293
|
+
escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t *start, const uint8_t *end, uint32_t value) {
|
6294
|
+
if (value <= 0x7F) { // 0xxxxxxx
|
6295
|
+
pm_buffer_append_u8(buffer, (uint8_t) value);
|
6296
|
+
} else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
|
6297
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0xC0 | (value >> 6)));
|
6298
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
|
6299
|
+
} else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
|
6300
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0xE0 | (value >> 12)));
|
6301
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
|
6302
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
|
6303
|
+
} else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
6304
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0xF0 | (value >> 18)));
|
6305
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
|
6306
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
|
6307
|
+
pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
|
6308
|
+
} else {
|
6309
|
+
pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
|
6310
|
+
pm_buffer_append_u8(buffer, 0xEF);
|
6311
|
+
pm_buffer_append_u8(buffer, 0xBF);
|
6312
|
+
pm_buffer_append_u8(buffer, 0xBD);
|
6313
|
+
}
|
6314
|
+
}
|
6315
|
+
|
6316
|
+
// The regular expression engine doesn't support the same escape sequences as
|
6317
|
+
// Ruby does. So first we have to read the escape sequence, and then we have to
|
6318
|
+
// format it like the regular expression engine expects it. For example, in Ruby
|
6319
|
+
// if we have:
|
6320
|
+
//
|
6321
|
+
// /\M-\C-?/
|
6322
|
+
//
|
6323
|
+
// then the first byte is actually 255, so we have to rewrite this as:
|
6324
|
+
//
|
6325
|
+
// /\xFF/
|
6326
|
+
//
|
6327
|
+
// Note that in this case there is a literal \ byte in the regular expression
|
6328
|
+
// source so that the regular expression engine will perform its own unescaping.
|
6329
|
+
static inline void
|
6330
|
+
escape_write_byte(pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
|
6331
|
+
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
6332
|
+
pm_buffer_append_bytes(buffer, (const uint8_t *) "\\x", 2);
|
6333
|
+
|
6334
|
+
uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
|
6335
|
+
uint8_t byte2 = (uint8_t) (byte & 0xF);
|
6336
|
+
|
6337
|
+
if (byte1 >= 0xA) {
|
6338
|
+
pm_buffer_append_u8(buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
|
6339
|
+
} else {
|
6340
|
+
pm_buffer_append_u8(buffer, (uint8_t) (byte1 + '0'));
|
6341
|
+
}
|
6342
|
+
|
6343
|
+
if (byte2 >= 0xA) {
|
6344
|
+
pm_buffer_append_u8(buffer, (uint8_t) (byte2 - 0xA + 'A'));
|
6345
|
+
} else {
|
6346
|
+
pm_buffer_append_u8(buffer, (uint8_t) (byte2 + '0'));
|
6347
|
+
}
|
6348
|
+
} else {
|
6349
|
+
pm_buffer_append_u8(buffer, byte);
|
6350
|
+
}
|
6351
|
+
}
|
6352
|
+
|
6353
|
+
// Read the value of an escape into the buffer.
|
6354
|
+
static void
|
6355
|
+
escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
|
6356
|
+
switch (peek(parser)) {
|
6357
|
+
case '\\': {
|
6358
|
+
parser->current.end++;
|
6359
|
+
pm_buffer_append_u8(buffer, '\\');
|
6360
|
+
return;
|
6361
|
+
}
|
6362
|
+
case '\'': {
|
6363
|
+
parser->current.end++;
|
6364
|
+
pm_buffer_append_u8(buffer, '\'');
|
6365
|
+
return;
|
6366
|
+
}
|
6367
|
+
case 'a': {
|
6368
|
+
parser->current.end++;
|
6369
|
+
pm_buffer_append_u8(buffer, '\a');
|
6370
|
+
return;
|
6371
|
+
}
|
6372
|
+
case 'b': {
|
6373
|
+
parser->current.end++;
|
6374
|
+
pm_buffer_append_u8(buffer, '\b');
|
6375
|
+
return;
|
6376
|
+
}
|
6377
|
+
case 'e': {
|
6378
|
+
parser->current.end++;
|
6379
|
+
pm_buffer_append_u8(buffer, '\033');
|
6380
|
+
return;
|
6381
|
+
}
|
6382
|
+
case 'f': {
|
6383
|
+
parser->current.end++;
|
6384
|
+
pm_buffer_append_u8(buffer, '\f');
|
6385
|
+
return;
|
6386
|
+
}
|
6387
|
+
case 'n': {
|
6388
|
+
parser->current.end++;
|
6389
|
+
pm_buffer_append_u8(buffer, '\n');
|
6390
|
+
return;
|
6391
|
+
}
|
6392
|
+
case 'r': {
|
6393
|
+
parser->current.end++;
|
6394
|
+
pm_buffer_append_u8(buffer, '\r');
|
6395
|
+
return;
|
6396
|
+
}
|
6397
|
+
case 's': {
|
6398
|
+
parser->current.end++;
|
6399
|
+
pm_buffer_append_u8(buffer, ' ');
|
6400
|
+
return;
|
6401
|
+
}
|
6402
|
+
case 't': {
|
6403
|
+
parser->current.end++;
|
6404
|
+
pm_buffer_append_u8(buffer, '\t');
|
6405
|
+
return;
|
6406
|
+
}
|
6407
|
+
case 'v': {
|
6408
|
+
parser->current.end++;
|
6409
|
+
pm_buffer_append_u8(buffer, '\v');
|
6410
|
+
return;
|
6411
|
+
}
|
6412
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
|
6413
|
+
uint8_t value = (uint8_t) (*parser->current.end - '0');
|
6414
|
+
parser->current.end++;
|
6415
|
+
|
6416
|
+
if (pm_char_is_octal_digit(peek(parser))) {
|
6417
|
+
value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
|
6418
|
+
parser->current.end++;
|
6419
|
+
|
6420
|
+
if (pm_char_is_octal_digit(peek(parser))) {
|
6421
|
+
value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
|
6422
|
+
parser->current.end++;
|
6423
|
+
}
|
6424
|
+
}
|
6425
|
+
|
6426
|
+
pm_buffer_append_u8(buffer, value);
|
6427
|
+
return;
|
6428
|
+
}
|
6429
|
+
case 'x': {
|
6430
|
+
const uint8_t *start = parser->current.end - 1;
|
6431
|
+
|
6432
|
+
parser->current.end++;
|
6433
|
+
uint8_t byte = peek(parser);
|
6434
|
+
|
6435
|
+
if (pm_char_is_hexadecimal_digit(byte)) {
|
6436
|
+
uint8_t value = escape_hexadecimal_digit(byte);
|
6437
|
+
parser->current.end++;
|
6438
|
+
|
6439
|
+
byte = peek(parser);
|
6440
|
+
if (pm_char_is_hexadecimal_digit(byte)) {
|
6441
|
+
value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
|
6442
|
+
parser->current.end++;
|
6443
|
+
}
|
6444
|
+
|
6445
|
+
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
6446
|
+
pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end - start));
|
6447
|
+
} else {
|
6448
|
+
pm_buffer_append_u8(buffer, value);
|
6449
|
+
}
|
6450
|
+
} else {
|
6451
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
|
6452
|
+
}
|
6453
|
+
|
6454
|
+
return;
|
6455
|
+
}
|
6456
|
+
case 'u': {
|
6457
|
+
const uint8_t *start = parser->current.end - 1;
|
6458
|
+
parser->current.end++;
|
6459
|
+
|
6460
|
+
if (
|
6461
|
+
(parser->current.end + 4 <= parser->end) &&
|
6462
|
+
pm_char_is_hexadecimal_digit(parser->current.end[0]) &&
|
6463
|
+
pm_char_is_hexadecimal_digit(parser->current.end[1]) &&
|
6464
|
+
pm_char_is_hexadecimal_digit(parser->current.end[2]) &&
|
6465
|
+
pm_char_is_hexadecimal_digit(parser->current.end[3])
|
6466
|
+
) {
|
6467
|
+
uint32_t value = escape_unicode(parser->current.end, 4);
|
6468
|
+
|
6469
|
+
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
6470
|
+
pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end + 4 - start));
|
6471
|
+
} else {
|
6472
|
+
escape_write_unicode(parser, buffer, start, parser->current.end + 4, value);
|
6473
|
+
}
|
6474
|
+
|
6475
|
+
parser->current.end += 4;
|
6476
|
+
} else if (peek(parser) == '{') {
|
6477
|
+
const uint8_t *unicode_codepoints_start = parser->current.end - 2;
|
6478
|
+
|
6479
|
+
parser->current.end++;
|
6480
|
+
parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
|
6481
|
+
|
6482
|
+
const uint8_t *extra_codepoints_start = NULL;
|
6483
|
+
int codepoints_count = 0;
|
6484
|
+
|
6485
|
+
while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
|
6486
|
+
const uint8_t *unicode_start = parser->current.end;
|
6487
|
+
size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
|
6488
|
+
|
6489
|
+
if (hexadecimal_length > 6) {
|
6490
|
+
// \u{nnnn} character literal allows only 1-6 hexadecimal digits
|
6491
|
+
pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
|
6492
|
+
} else if (hexadecimal_length == 0) {
|
6493
|
+
// there are not hexadecimal characters
|
6494
|
+
pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE);
|
6495
|
+
return;
|
6496
|
+
}
|
6497
|
+
|
6498
|
+
parser->current.end += hexadecimal_length;
|
6499
|
+
codepoints_count++;
|
6500
|
+
if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
|
6501
|
+
extra_codepoints_start = unicode_start;
|
6502
|
+
}
|
6503
|
+
|
6504
|
+
if (!(flags & PM_ESCAPE_FLAG_REGEXP)) {
|
6505
|
+
uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
|
6506
|
+
escape_write_unicode(parser, buffer, unicode_start, parser->current.end, value);
|
6507
|
+
}
|
6508
|
+
|
6509
|
+
parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
|
6510
|
+
}
|
6511
|
+
|
6512
|
+
// ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
|
6513
|
+
if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
|
6514
|
+
pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
|
6515
|
+
}
|
6516
|
+
|
6517
|
+
if (peek(parser) == '}') {
|
6518
|
+
parser->current.end++;
|
6519
|
+
} else {
|
6520
|
+
pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
|
6521
|
+
}
|
6522
|
+
|
6523
|
+
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
6524
|
+
pm_buffer_append_bytes(buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
|
6525
|
+
}
|
6526
|
+
} else {
|
6527
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
|
6528
|
+
}
|
6529
|
+
|
6530
|
+
return;
|
6531
|
+
}
|
6532
|
+
case 'c': {
|
6533
|
+
parser->current.end++;
|
6534
|
+
if (parser->current.end == parser->end) {
|
6535
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
|
6536
|
+
return;
|
6537
|
+
}
|
6538
|
+
|
6539
|
+
uint8_t peeked = peek(parser);
|
6540
|
+
switch (peeked) {
|
6541
|
+
case '?': {
|
6542
|
+
parser->current.end++;
|
6543
|
+
escape_write_byte(buffer, flags, escape_byte(0x7f, flags));
|
6544
|
+
return;
|
6545
|
+
}
|
6546
|
+
case '\\':
|
6547
|
+
if (flags & PM_ESCAPE_FLAG_CONTROL) {
|
6548
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
6549
|
+
return;
|
6550
|
+
}
|
6551
|
+
parser->current.end++;
|
6552
|
+
escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_CONTROL);
|
6553
|
+
return;
|
6554
|
+
default: {
|
6555
|
+
if (!char_is_ascii_printable(peeked)) {
|
6556
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
|
6557
|
+
return;
|
6558
|
+
}
|
6559
|
+
|
6560
|
+
parser->current.end++;
|
6561
|
+
escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
|
6562
|
+
return;
|
6563
|
+
}
|
6564
|
+
}
|
6565
|
+
}
|
6566
|
+
case 'C': {
|
6567
|
+
parser->current.end++;
|
6568
|
+
if (peek(parser) != '-') {
|
6569
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
|
6570
|
+
return;
|
6571
|
+
}
|
6572
|
+
|
6573
|
+
parser->current.end++;
|
6574
|
+
if (parser->current.end == parser->end) {
|
6575
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
|
6576
|
+
return;
|
6577
|
+
}
|
6578
|
+
|
6579
|
+
uint8_t peeked = peek(parser);
|
6580
|
+
switch (peeked) {
|
6581
|
+
case '?': {
|
6582
|
+
parser->current.end++;
|
6583
|
+
escape_write_byte(buffer, flags, escape_byte(0x7f, flags));
|
6584
|
+
return;
|
6585
|
+
}
|
6586
|
+
case '\\':
|
6587
|
+
if (flags & PM_ESCAPE_FLAG_CONTROL) {
|
6588
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
6589
|
+
return;
|
6590
|
+
}
|
6591
|
+
parser->current.end++;
|
6592
|
+
escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_CONTROL);
|
6593
|
+
return;
|
6594
|
+
default: {
|
6595
|
+
if (!char_is_ascii_printable(peeked)) {
|
6596
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
|
6597
|
+
return;
|
6598
|
+
}
|
6599
|
+
|
6600
|
+
parser->current.end++;
|
6601
|
+
escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
|
6602
|
+
return;
|
6603
|
+
}
|
6604
|
+
}
|
6605
|
+
}
|
6606
|
+
case 'M': {
|
6607
|
+
parser->current.end++;
|
6608
|
+
if (peek(parser) != '-') {
|
6609
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
|
6610
|
+
return;
|
6611
|
+
}
|
6612
|
+
|
6613
|
+
parser->current.end++;
|
6614
|
+
if (parser->current.end == parser->end) {
|
6615
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
|
6616
|
+
return;
|
6617
|
+
}
|
6618
|
+
|
6619
|
+
uint8_t peeked = peek(parser);
|
6620
|
+
if (peeked == '\\') {
|
6621
|
+
if (flags & PM_ESCAPE_FLAG_META) {
|
6622
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
|
6623
|
+
return;
|
6624
|
+
}
|
6625
|
+
parser->current.end++;
|
6626
|
+
escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_META);
|
6627
|
+
return;
|
6628
|
+
}
|
6629
|
+
|
6630
|
+
if (!char_is_ascii_printable(peeked)) {
|
6631
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
|
6632
|
+
return;
|
6633
|
+
}
|
6634
|
+
|
6635
|
+
parser->current.end++;
|
6636
|
+
escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
|
6637
|
+
return;
|
6638
|
+
}
|
6639
|
+
case '\r': {
|
6640
|
+
if (peek_offset(parser, 1) == '\n') {
|
6641
|
+
parser->current.end += 2;
|
6642
|
+
pm_buffer_append_u8(buffer, '\n');
|
6643
|
+
return;
|
6644
|
+
}
|
6645
|
+
}
|
6646
|
+
/* fallthrough */
|
6647
|
+
default: {
|
6648
|
+
if (parser->current.end < parser->end) {
|
6649
|
+
pm_buffer_append_u8(buffer, *parser->current.end++);
|
6650
|
+
}
|
6651
|
+
return;
|
6652
|
+
}
|
6653
|
+
}
|
6654
|
+
}
|
6655
|
+
|
6656
|
+
// This function is responsible for lexing either a character literal or the ?
|
6657
|
+
// operator. The supported character literals are described below.
|
6658
|
+
//
|
6659
|
+
// \a bell, ASCII 07h (BEL)
|
6660
|
+
// \b backspace, ASCII 08h (BS)
|
6661
|
+
// \t horizontal tab, ASCII 09h (TAB)
|
6662
|
+
// \n newline (line feed), ASCII 0Ah (LF)
|
6663
|
+
// \v vertical tab, ASCII 0Bh (VT)
|
6664
|
+
// \f form feed, ASCII 0Ch (FF)
|
6665
|
+
// \r carriage return, ASCII 0Dh (CR)
|
6666
|
+
// \e escape, ASCII 1Bh (ESC)
|
6667
|
+
// \s space, ASCII 20h (SPC)
|
6668
|
+
// \\ backslash
|
6669
|
+
// \nnn octal bit pattern, where nnn is 1-3 octal digits ([0-7])
|
6670
|
+
// \xnn hexadecimal bit pattern, where nn is 1-2 hexadecimal digits ([0-9a-fA-F])
|
6671
|
+
// \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
|
6672
|
+
// \u{nnnn ...} Unicode character(s), where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F])
|
6673
|
+
// \cx or \C-x control character, where x is an ASCII printable character
|
6674
|
+
// \M-x meta character, where x is an ASCII printable character
|
6675
|
+
// \M-\C-x meta control character, where x is an ASCII printable character
|
6676
|
+
// \M-\cx same as above
|
6677
|
+
// \c\M-x same as above
|
6678
|
+
// \c? or \C-? delete, ASCII 7Fh (DEL)
|
6679
|
+
//
|
6680
|
+
static pm_token_type_t
|
6681
|
+
lex_question_mark(pm_parser_t *parser) {
|
6682
|
+
if (lex_state_end_p(parser)) {
|
6683
|
+
lex_state_set(parser, PM_LEX_STATE_BEG);
|
6684
|
+
return PM_TOKEN_QUESTION_MARK;
|
6685
|
+
}
|
6686
|
+
|
6687
|
+
if (parser->current.end >= parser->end) {
|
6688
|
+
pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
|
6689
|
+
pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
|
6690
|
+
return PM_TOKEN_CHARACTER_LITERAL;
|
6691
|
+
}
|
6692
|
+
|
6693
|
+
if (pm_char_is_whitespace(*parser->current.end)) {
|
6694
|
+
lex_state_set(parser, PM_LEX_STATE_BEG);
|
6695
|
+
return PM_TOKEN_QUESTION_MARK;
|
6696
|
+
}
|
6005
6697
|
|
6006
6698
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
6007
6699
|
|
6008
|
-
if (parser
|
6700
|
+
if (match(parser, '\\')) {
|
6009
6701
|
lex_state_set(parser, PM_LEX_STATE_END);
|
6010
|
-
|
6702
|
+
|
6703
|
+
pm_buffer_t buffer;
|
6704
|
+
pm_buffer_init_capacity(&buffer, 3);
|
6705
|
+
|
6706
|
+
escape_read(parser, &buffer, PM_ESCAPE_FLAG_SINGLE);
|
6707
|
+
pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
|
6708
|
+
|
6011
6709
|
return PM_TOKEN_CHARACTER_LITERAL;
|
6012
6710
|
} else {
|
6013
6711
|
size_t encoding_width = parser->encoding.char_width(parser->current.end, parser->end - parser->current.end);
|
@@ -6024,6 +6722,7 @@ lex_question_mark(pm_parser_t *parser) {
|
|
6024
6722
|
) {
|
6025
6723
|
lex_state_set(parser, PM_LEX_STATE_END);
|
6026
6724
|
parser->current.end += encoding_width;
|
6725
|
+
pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
|
6027
6726
|
return PM_TOKEN_CHARACTER_LITERAL;
|
6028
6727
|
}
|
6029
6728
|
}
|
@@ -6045,9 +6744,9 @@ lex_at_variable(pm_parser_t *parser) {
|
|
6045
6744
|
parser->current.end += width;
|
6046
6745
|
}
|
6047
6746
|
} else if (type == PM_TOKEN_CLASS_VARIABLE) {
|
6048
|
-
|
6747
|
+
pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
|
6049
6748
|
} else {
|
6050
|
-
|
6749
|
+
pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_INSTANCE);
|
6051
6750
|
}
|
6052
6751
|
|
6053
6752
|
// If we're lexing an embedded variable, then we need to pop back into the
|
@@ -6070,7 +6769,7 @@ parser_lex_callback(pm_parser_t *parser) {
|
|
6070
6769
|
// Return a new comment node of the specified type.
|
6071
6770
|
static inline pm_comment_t *
|
6072
6771
|
parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
|
6073
|
-
pm_comment_t *comment = (pm_comment_t *)
|
6772
|
+
pm_comment_t *comment = (pm_comment_t *) calloc(sizeof(pm_comment_t), 1);
|
6074
6773
|
if (comment == NULL) return NULL;
|
6075
6774
|
|
6076
6775
|
*comment = (pm_comment_t) {
|
@@ -6146,7 +6845,7 @@ lex_embdoc(pm_parser_t *parser) {
|
|
6146
6845
|
parser_lex_callback(parser);
|
6147
6846
|
}
|
6148
6847
|
|
6149
|
-
|
6848
|
+
pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
|
6150
6849
|
|
6151
6850
|
comment->end = parser->current.end;
|
6152
6851
|
pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
|
@@ -6177,6 +6876,113 @@ parser_flush_heredoc_end(pm_parser_t *parser) {
|
|
6177
6876
|
parser->heredoc_end = NULL;
|
6178
6877
|
}
|
6179
6878
|
|
6879
|
+
// When we're lexing certain types (strings, symbols, lists, etc.) we have
|
6880
|
+
// string content associated with the tokens. For example:
|
6881
|
+
//
|
6882
|
+
// "foo"
|
6883
|
+
//
|
6884
|
+
// In this case, the string content is foo. Since there is no escaping, there's
|
6885
|
+
// no need to track additional information and the token can be returned as
|
6886
|
+
// normal. However, if we have escape sequences:
|
6887
|
+
//
|
6888
|
+
// "foo\n"
|
6889
|
+
//
|
6890
|
+
// then the bytes in the string are "f", "o", "o", "\", "n", but we want to
|
6891
|
+
// provide out consumers with the string content "f", "o", "o", "\n". In these
|
6892
|
+
// cases, when we find the first escape sequence, we initialize a pm_buffer_t
|
6893
|
+
// to keep track of the string content. Then in the parser, it will
|
6894
|
+
// automatically attach the string content to the node that it belongs to.
|
6895
|
+
typedef struct {
|
6896
|
+
pm_buffer_t buffer;
|
6897
|
+
const uint8_t *cursor;
|
6898
|
+
} pm_token_buffer_t;
|
6899
|
+
|
6900
|
+
// Push the given byte into the token buffer.
|
6901
|
+
static inline void
|
6902
|
+
pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
|
6903
|
+
pm_buffer_append_u8(&token_buffer->buffer, byte);
|
6904
|
+
}
|
6905
|
+
|
6906
|
+
// When we're about to return from lexing the current token and we know for sure
|
6907
|
+
// that we have found an escape sequence, this function is called to copy the
|
6908
|
+
// contents of the token buffer into the current string on the parser so that it
|
6909
|
+
// can be attached to the correct node.
|
6910
|
+
static inline void
|
6911
|
+
pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
6912
|
+
pm_string_owned_init(&parser->current_string, (uint8_t *) token_buffer->buffer.value, token_buffer->buffer.length);
|
6913
|
+
}
|
6914
|
+
|
6915
|
+
// When we're about to return from lexing the current token, we need to flush
|
6916
|
+
// all of the content that we have pushed into the buffer into the current
|
6917
|
+
// string. If we haven't pushed anything into the buffer, this means that we
|
6918
|
+
// never found an escape sequence, so we can directly reference the bounds of
|
6919
|
+
// the current string. Either way, at the return of this function it is expected
|
6920
|
+
// that parser->current_string is established in such a way that it can be
|
6921
|
+
// attached to a node.
|
6922
|
+
static void
|
6923
|
+
pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
6924
|
+
if (token_buffer->cursor == NULL) {
|
6925
|
+
pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
|
6926
|
+
} else {
|
6927
|
+
pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
|
6928
|
+
pm_token_buffer_copy(parser, token_buffer);
|
6929
|
+
}
|
6930
|
+
}
|
6931
|
+
|
6932
|
+
// When we've found an escape sequence, we need to copy everything up to this
|
6933
|
+
// point into the buffer because we're about to provide a string that has
|
6934
|
+
// different content than a direct slice of the source.
|
6935
|
+
//
|
6936
|
+
// It is expected that the parser's current token end will be pointing at one
|
6937
|
+
// byte past the backslash that starts the escape sequence.
|
6938
|
+
static void
|
6939
|
+
pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
6940
|
+
const uint8_t *start;
|
6941
|
+
if (token_buffer->cursor == NULL) {
|
6942
|
+
pm_buffer_init_capacity(&token_buffer->buffer, 16);
|
6943
|
+
start = parser->current.start;
|
6944
|
+
} else {
|
6945
|
+
start = token_buffer->cursor;
|
6946
|
+
}
|
6947
|
+
|
6948
|
+
const uint8_t *end = parser->current.end - 1;
|
6949
|
+
pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
|
6950
|
+
}
|
6951
|
+
|
6952
|
+
// Effectively the same thing as pm_strspn_inline_whitespace, but in the case of
|
6953
|
+
// a tilde heredoc expands out tab characters to the nearest tab boundaries.
|
6954
|
+
static inline size_t
|
6955
|
+
pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
|
6956
|
+
size_t whitespace = 0;
|
6957
|
+
|
6958
|
+
switch (indent) {
|
6959
|
+
case PM_HEREDOC_INDENT_NONE:
|
6960
|
+
// Do nothing, we can't match a terminator with
|
6961
|
+
// indentation and there's no need to calculate common
|
6962
|
+
// whitespace.
|
6963
|
+
break;
|
6964
|
+
case PM_HEREDOC_INDENT_DASH:
|
6965
|
+
// Skip past inline whitespace.
|
6966
|
+
*cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
|
6967
|
+
break;
|
6968
|
+
case PM_HEREDOC_INDENT_TILDE:
|
6969
|
+
// Skip past inline whitespace and calculate common
|
6970
|
+
// whitespace.
|
6971
|
+
while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
|
6972
|
+
if (**cursor == '\t') {
|
6973
|
+
whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
|
6974
|
+
} else {
|
6975
|
+
whitespace++;
|
6976
|
+
}
|
6977
|
+
(*cursor)++;
|
6978
|
+
}
|
6979
|
+
|
6980
|
+
break;
|
6981
|
+
}
|
6982
|
+
|
6983
|
+
return whitespace;
|
6984
|
+
}
|
6985
|
+
|
6180
6986
|
// This is a convenience macro that will set the current token type, call the
|
6181
6987
|
// lex callback, and then return from the parser_lex function.
|
6182
6988
|
#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
|
@@ -6301,12 +7107,16 @@ parser_lex(pm_parser_t *parser) {
|
|
6301
7107
|
parser->current.type = PM_TOKEN_COMMENT;
|
6302
7108
|
parser_lex_callback(parser);
|
6303
7109
|
|
6304
|
-
if
|
6305
|
-
|
6306
|
-
|
7110
|
+
// Here, parse the comment to see if it's a magic comment
|
7111
|
+
// and potentially change state on the parser.
|
7112
|
+
if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
|
7113
|
+
ptrdiff_t length = parser->current.end - parser->current.start;
|
6307
7114
|
|
6308
|
-
|
6309
|
-
|
7115
|
+
// If we didn't find a magic comment within the first
|
7116
|
+
// pass and we're at the start of the file, then we need
|
7117
|
+
// to do another pass to potentially find other patterns
|
7118
|
+
// for encoding comments.
|
7119
|
+
if (length >= 10) parser_lex_magic_comment_encoding(parser);
|
6310
7120
|
}
|
6311
7121
|
|
6312
7122
|
lexed_comment = true;
|
@@ -6588,7 +7398,7 @@ parser_lex(pm_parser_t *parser) {
|
|
6588
7398
|
pm_token_type_t type = PM_TOKEN_STAR;
|
6589
7399
|
|
6590
7400
|
if (lex_state_spcarg_p(parser, space_seen)) {
|
6591
|
-
|
7401
|
+
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
|
6592
7402
|
type = PM_TOKEN_USTAR;
|
6593
7403
|
} else if (lex_state_beg_p(parser)) {
|
6594
7404
|
type = PM_TOKEN_USTAR;
|
@@ -6626,7 +7436,7 @@ parser_lex(pm_parser_t *parser) {
|
|
6626
7436
|
|
6627
7437
|
// = => =~ == === =begin
|
6628
7438
|
case '=':
|
6629
|
-
if (current_token_starts_line(parser) &&
|
7439
|
+
if (current_token_starts_line(parser) && (parser->current.end + 5 <= parser->end) && memcmp(parser->current.end, "begin", 5) == 0 && pm_char_is_whitespace(peek_offset(parser, 5))) {
|
6630
7440
|
pm_token_type_t type = lex_embdoc(parser);
|
6631
7441
|
|
6632
7442
|
if (type == PM_TOKEN_EOF) {
|
@@ -6720,7 +7530,8 @@ parser_lex(pm_parser_t *parser) {
|
|
6720
7530
|
.ident_length = ident_length,
|
6721
7531
|
.next_start = parser->current.end,
|
6722
7532
|
.quote = quote,
|
6723
|
-
.indent = indent
|
7533
|
+
.indent = indent,
|
7534
|
+
.common_whitespace = (size_t) -1
|
6724
7535
|
}
|
6725
7536
|
});
|
6726
7537
|
|
@@ -6732,7 +7543,7 @@ parser_lex(pm_parser_t *parser) {
|
|
6732
7543
|
// this is not a valid heredoc declaration. In this case we
|
6733
7544
|
// will add an error, but we will still return a heredoc
|
6734
7545
|
// start.
|
6735
|
-
|
7546
|
+
pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
|
6736
7547
|
body_start = parser->end;
|
6737
7548
|
} else {
|
6738
7549
|
// Otherwise, we want to indicate that the body of the
|
@@ -6925,12 +7736,7 @@ parser_lex(pm_parser_t *parser) {
|
|
6925
7736
|
|
6926
7737
|
bool spcarg = lex_state_spcarg_p(parser, space_seen);
|
6927
7738
|
if (spcarg) {
|
6928
|
-
|
6929
|
-
&parser->warning_list,
|
6930
|
-
parser->current.start,
|
6931
|
-
parser->current.end,
|
6932
|
-
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS
|
6933
|
-
);
|
7739
|
+
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS);
|
6934
7740
|
}
|
6935
7741
|
|
6936
7742
|
if (lex_state_beg_p(parser) || spcarg) {
|
@@ -6974,12 +7780,7 @@ parser_lex(pm_parser_t *parser) {
|
|
6974
7780
|
|
6975
7781
|
bool spcarg = lex_state_spcarg_p(parser, space_seen);
|
6976
7782
|
if (spcarg) {
|
6977
|
-
|
6978
|
-
&parser->warning_list,
|
6979
|
-
parser->current.start,
|
6980
|
-
parser->current.end,
|
6981
|
-
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS
|
6982
|
-
);
|
7783
|
+
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
|
6983
7784
|
}
|
6984
7785
|
|
6985
7786
|
if (lex_state_beg_p(parser) || spcarg) {
|
@@ -7076,7 +7877,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7076
7877
|
}
|
7077
7878
|
|
7078
7879
|
if (lex_state_spcarg_p(parser, space_seen)) {
|
7079
|
-
|
7880
|
+
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
|
7080
7881
|
lex_mode_push_regexp(parser, '\0', '/');
|
7081
7882
|
LEX(PM_TOKEN_REGEXP_BEGIN);
|
7082
7883
|
}
|
@@ -7116,7 +7917,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7116
7917
|
// operator because we don't want to move into the string
|
7117
7918
|
// lex mode unnecessarily.
|
7118
7919
|
if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
|
7119
|
-
|
7920
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
|
7120
7921
|
LEX(PM_TOKEN_PERCENT);
|
7121
7922
|
}
|
7122
7923
|
|
@@ -7149,7 +7950,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7149
7950
|
// validate that here.
|
7150
7951
|
uint8_t delimiter = peek_offset(parser, 1);
|
7151
7952
|
if (delimiter >= 0x80 || parser->encoding.alnum_char(&delimiter, 1)) {
|
7152
|
-
|
7953
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
|
7153
7954
|
goto lex_next_token;
|
7154
7955
|
}
|
7155
7956
|
|
@@ -7249,7 +8050,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7249
8050
|
// unparseable. In this case we'll just drop it from the parser
|
7250
8051
|
// and skip past it and hope that the next token is something
|
7251
8052
|
// that we can parse.
|
7252
|
-
|
8053
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
|
7253
8054
|
goto lex_next_token;
|
7254
8055
|
}
|
7255
8056
|
}
|
@@ -7285,7 +8086,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7285
8086
|
// token as we've exhausted all of the other options. We'll skip past
|
7286
8087
|
// it and return the next token.
|
7287
8088
|
if (!width) {
|
7288
|
-
|
8089
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_TOKEN);
|
7289
8090
|
goto lex_next_token;
|
7290
8091
|
}
|
7291
8092
|
|
@@ -7351,7 +8152,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7351
8152
|
}
|
7352
8153
|
}
|
7353
8154
|
}
|
7354
|
-
case PM_LEX_LIST:
|
8155
|
+
case PM_LEX_LIST: {
|
7355
8156
|
if (parser->next_start != NULL) {
|
7356
8157
|
parser->current.end = parser->next_start;
|
7357
8158
|
parser->next_start = NULL;
|
@@ -7394,6 +8195,10 @@ parser_lex(pm_parser_t *parser) {
|
|
7394
8195
|
const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
|
7395
8196
|
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7396
8197
|
|
8198
|
+
// If we haven't found an escape yet, then this buffer will be
|
8199
|
+
// unallocated since we can refer directly to the source string.
|
8200
|
+
pm_token_buffer_t token_buffer = { 0 };
|
8201
|
+
|
7397
8202
|
while (breakpoint != NULL) {
|
7398
8203
|
// If we hit a null byte, skip directly past it.
|
7399
8204
|
if (*breakpoint == '\0') {
|
@@ -7405,16 +8210,18 @@ parser_lex(pm_parser_t *parser) {
|
|
7405
8210
|
// now, so we can return an element of the list.
|
7406
8211
|
if (pm_char_is_whitespace(*breakpoint)) {
|
7407
8212
|
parser->current.end = breakpoint;
|
8213
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
7408
8214
|
LEX(PM_TOKEN_STRING_CONTENT);
|
7409
8215
|
}
|
7410
8216
|
|
7411
|
-
//If we hit the terminator, we need to check which token to
|
8217
|
+
// If we hit the terminator, we need to check which token to
|
7412
8218
|
// return.
|
7413
8219
|
if (*breakpoint == lex_mode->as.list.terminator) {
|
7414
8220
|
// If this terminator doesn't actually close the list, then
|
7415
8221
|
// we need to continue on past it.
|
7416
8222
|
if (lex_mode->as.list.nesting > 0) {
|
7417
|
-
|
8223
|
+
parser->current.end = breakpoint + 1;
|
8224
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7418
8225
|
lex_mode->as.list.nesting--;
|
7419
8226
|
continue;
|
7420
8227
|
}
|
@@ -7423,6 +8230,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7423
8230
|
// past content, then we can return a list node.
|
7424
8231
|
if (breakpoint > parser->current.start) {
|
7425
8232
|
parser->current.end = breakpoint;
|
8233
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
7426
8234
|
LEX(PM_TOKEN_STRING_CONTENT);
|
7427
8235
|
}
|
7428
8236
|
|
@@ -7438,59 +8246,109 @@ parser_lex(pm_parser_t *parser) {
|
|
7438
8246
|
// literally. In this case we'll skip past the next character
|
7439
8247
|
// and find the next breakpoint.
|
7440
8248
|
if (*breakpoint == '\\') {
|
7441
|
-
|
7442
|
-
|
7443
|
-
|
7444
|
-
|
8249
|
+
parser->current.end = breakpoint + 1;
|
8250
|
+
|
8251
|
+
// If we've hit the end of the file, then break out of the
|
8252
|
+
// loop by setting the breakpoint to NULL.
|
8253
|
+
if (parser->current.end == parser->end) {
|
7445
8254
|
breakpoint = NULL;
|
7446
8255
|
continue;
|
7447
8256
|
}
|
7448
8257
|
|
7449
|
-
|
7450
|
-
|
7451
|
-
|
7452
|
-
|
7453
|
-
|
7454
|
-
|
7455
|
-
|
7456
|
-
|
7457
|
-
|
7458
|
-
|
7459
|
-
|
7460
|
-
|
8258
|
+
pm_token_buffer_escape(parser, &token_buffer);
|
8259
|
+
uint8_t peeked = peek(parser);
|
8260
|
+
|
8261
|
+
switch (peeked) {
|
8262
|
+
case ' ':
|
8263
|
+
case '\f':
|
8264
|
+
case '\t':
|
8265
|
+
case '\v':
|
8266
|
+
case '\\':
|
8267
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8268
|
+
parser->current.end++;
|
8269
|
+
break;
|
8270
|
+
case '\r':
|
8271
|
+
parser->current.end++;
|
8272
|
+
if (peek(parser) != '\n') {
|
8273
|
+
pm_token_buffer_push(&token_buffer, '\r');
|
8274
|
+
break;
|
8275
|
+
}
|
8276
|
+
/* fallthrough */
|
8277
|
+
case '\n':
|
8278
|
+
pm_token_buffer_push(&token_buffer, '\n');
|
8279
|
+
|
8280
|
+
if (parser->heredoc_end) {
|
8281
|
+
// ... if we are on the same line as a heredoc,
|
8282
|
+
// flush the heredoc and continue parsing after
|
8283
|
+
// heredoc_end.
|
8284
|
+
parser_flush_heredoc_end(parser);
|
8285
|
+
pm_token_buffer_copy(parser, &token_buffer);
|
8286
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
8287
|
+
} else {
|
8288
|
+
// ... else track the newline.
|
8289
|
+
pm_newline_list_append(&parser->newline_list, parser->current.end);
|
8290
|
+
}
|
8291
|
+
|
8292
|
+
parser->current.end++;
|
8293
|
+
break;
|
8294
|
+
default:
|
8295
|
+
if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
|
8296
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8297
|
+
parser->current.end++;
|
8298
|
+
} else if (lex_mode->as.list.interpolation) {
|
8299
|
+
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
8300
|
+
} else {
|
8301
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8302
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8303
|
+
parser->current.end++;
|
8304
|
+
}
|
8305
|
+
|
8306
|
+
break;
|
7461
8307
|
}
|
7462
8308
|
|
7463
|
-
|
8309
|
+
token_buffer.cursor = parser->current.end;
|
8310
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7464
8311
|
continue;
|
7465
8312
|
}
|
7466
8313
|
|
7467
8314
|
// If we hit a #, then we will attempt to lex interpolation.
|
7468
8315
|
if (*breakpoint == '#') {
|
7469
8316
|
pm_token_type_t type = lex_interpolation(parser, breakpoint);
|
7470
|
-
|
7471
|
-
|
8317
|
+
|
8318
|
+
if (type == PM_TOKEN_NOT_PROVIDED) {
|
8319
|
+
// If we haven't returned at this point then we had something
|
8320
|
+
// that looked like an interpolated class or instance variable
|
8321
|
+
// like "#@" but wasn't actually. In this case we'll just skip
|
8322
|
+
// to the next breakpoint.
|
8323
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
8324
|
+
continue;
|
7472
8325
|
}
|
7473
8326
|
|
7474
|
-
|
7475
|
-
|
7476
|
-
|
7477
|
-
|
7478
|
-
|
7479
|
-
continue;
|
8327
|
+
if (type == PM_TOKEN_STRING_CONTENT) {
|
8328
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8329
|
+
}
|
8330
|
+
|
8331
|
+
LEX(type);
|
7480
8332
|
}
|
7481
8333
|
|
7482
8334
|
// If we've hit the incrementor, then we need to skip past it
|
7483
8335
|
// and find the next breakpoint.
|
7484
8336
|
assert(*breakpoint == lex_mode->as.list.incrementor);
|
7485
|
-
|
8337
|
+
parser->current.end = breakpoint + 1;
|
8338
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7486
8339
|
lex_mode->as.list.nesting++;
|
7487
8340
|
continue;
|
7488
8341
|
}
|
7489
8342
|
|
7490
|
-
|
7491
|
-
|
7492
|
-
|
8343
|
+
if (parser->current.end > parser->current.start) {
|
8344
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8345
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
8346
|
+
}
|
7493
8347
|
|
8348
|
+
// If we were unable to find a breakpoint, then this token hits the
|
8349
|
+
// end of the file.
|
8350
|
+
LEX(PM_TOKEN_EOF);
|
8351
|
+
}
|
7494
8352
|
case PM_LEX_REGEXP: {
|
7495
8353
|
// First, we'll set to start of this token to be the current end.
|
7496
8354
|
if (parser->next_start == NULL) {
|
@@ -7515,11 +8373,13 @@ parser_lex(pm_parser_t *parser) {
|
|
7515
8373
|
// characters.
|
7516
8374
|
const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
|
7517
8375
|
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
8376
|
+
pm_token_buffer_t token_buffer = { 0 };
|
7518
8377
|
|
7519
8378
|
while (breakpoint != NULL) {
|
7520
8379
|
// If we hit a null byte, skip directly past it.
|
7521
8380
|
if (*breakpoint == '\0') {
|
7522
|
-
|
8381
|
+
parser->current.end = breakpoint + 1;
|
8382
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7523
8383
|
continue;
|
7524
8384
|
}
|
7525
8385
|
|
@@ -7540,7 +8400,8 @@ parser_lex(pm_parser_t *parser) {
|
|
7540
8400
|
if (lex_mode->as.regexp.terminator != '\n') {
|
7541
8401
|
// If the terminator is not a newline, then we can set
|
7542
8402
|
// the next breakpoint and continue.
|
7543
|
-
|
8403
|
+
parser->current.end = breakpoint + 1;
|
8404
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7544
8405
|
continue;
|
7545
8406
|
}
|
7546
8407
|
}
|
@@ -7549,7 +8410,8 @@ parser_lex(pm_parser_t *parser) {
|
|
7549
8410
|
// token to return.
|
7550
8411
|
if (*breakpoint == lex_mode->as.regexp.terminator) {
|
7551
8412
|
if (lex_mode->as.regexp.nesting > 0) {
|
7552
|
-
|
8413
|
+
parser->current.end = breakpoint + 1;
|
8414
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7553
8415
|
lex_mode->as.regexp.nesting--;
|
7554
8416
|
continue;
|
7555
8417
|
}
|
@@ -7559,11 +8421,12 @@ parser_lex(pm_parser_t *parser) {
|
|
7559
8421
|
// first.
|
7560
8422
|
if (breakpoint > parser->current.start) {
|
7561
8423
|
parser->current.end = breakpoint;
|
8424
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
7562
8425
|
LEX(PM_TOKEN_STRING_CONTENT);
|
7563
8426
|
}
|
7564
8427
|
|
7565
|
-
// Since we've hit the terminator of the regular expression,
|
7566
|
-
// need to parse the options.
|
8428
|
+
// Since we've hit the terminator of the regular expression,
|
8429
|
+
// we now need to parse the options.
|
7567
8430
|
parser->current.end = breakpoint + 1;
|
7568
8431
|
parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
|
7569
8432
|
|
@@ -7576,56 +8439,103 @@ parser_lex(pm_parser_t *parser) {
|
|
7576
8439
|
// literally. In this case we'll skip past the next character
|
7577
8440
|
// and find the next breakpoint.
|
7578
8441
|
if (*breakpoint == '\\') {
|
7579
|
-
|
7580
|
-
|
7581
|
-
|
8442
|
+
parser->current.end = breakpoint + 1;
|
8443
|
+
|
8444
|
+
// If we've hit the end of the file, then break out of the
|
8445
|
+
// loop by setting the breakpoint to NULL.
|
8446
|
+
if (parser->current.end == parser->end) {
|
7582
8447
|
breakpoint = NULL;
|
7583
8448
|
continue;
|
7584
8449
|
}
|
7585
8450
|
|
7586
|
-
|
7587
|
-
|
7588
|
-
|
7589
|
-
|
7590
|
-
|
7591
|
-
parser->current.end
|
7592
|
-
|
7593
|
-
|
7594
|
-
|
7595
|
-
|
7596
|
-
|
7597
|
-
|
8451
|
+
pm_token_buffer_escape(parser, &token_buffer);
|
8452
|
+
uint8_t peeked = peek(parser);
|
8453
|
+
|
8454
|
+
switch (peeked) {
|
8455
|
+
case '\r':
|
8456
|
+
parser->current.end++;
|
8457
|
+
if (peek(parser) != '\n') {
|
8458
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8459
|
+
pm_token_buffer_push(&token_buffer, '\r');
|
8460
|
+
break;
|
8461
|
+
}
|
8462
|
+
/* fallthrough */
|
8463
|
+
case '\n':
|
8464
|
+
if (parser->heredoc_end) {
|
8465
|
+
// ... if we are on the same line as a heredoc,
|
8466
|
+
// flush the heredoc and continue parsing after
|
8467
|
+
// heredoc_end.
|
8468
|
+
parser_flush_heredoc_end(parser);
|
8469
|
+
pm_token_buffer_copy(parser, &token_buffer);
|
8470
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
8471
|
+
} else {
|
8472
|
+
// ... else track the newline.
|
8473
|
+
pm_newline_list_append(&parser->newline_list, parser->current.end);
|
8474
|
+
}
|
8475
|
+
|
8476
|
+
parser->current.end++;
|
8477
|
+
break;
|
8478
|
+
case 'c':
|
8479
|
+
case 'C':
|
8480
|
+
case 'M':
|
8481
|
+
case 'u':
|
8482
|
+
case 'x':
|
8483
|
+
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_REGEXP);
|
8484
|
+
break;
|
8485
|
+
default:
|
8486
|
+
if (lex_mode->as.regexp.terminator == '/' && peeked == '/') {
|
8487
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8488
|
+
parser->current.end++;
|
8489
|
+
break;
|
8490
|
+
}
|
8491
|
+
|
8492
|
+
if (peeked < 0x80) pm_token_buffer_push(&token_buffer, '\\');
|
8493
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8494
|
+
parser->current.end++;
|
8495
|
+
break;
|
7598
8496
|
}
|
7599
8497
|
|
7600
|
-
|
8498
|
+
token_buffer.cursor = parser->current.end;
|
8499
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7601
8500
|
continue;
|
7602
8501
|
}
|
7603
8502
|
|
7604
8503
|
// If we hit a #, then we will attempt to lex interpolation.
|
7605
8504
|
if (*breakpoint == '#') {
|
7606
8505
|
pm_token_type_t type = lex_interpolation(parser, breakpoint);
|
7607
|
-
|
7608
|
-
|
8506
|
+
|
8507
|
+
if (type == PM_TOKEN_NOT_PROVIDED) {
|
8508
|
+
// If we haven't returned at this point then we had
|
8509
|
+
// something that looked like an interpolated class or
|
8510
|
+
// instance variable like "#@" but wasn't actually. In
|
8511
|
+
// this case we'll just skip to the next breakpoint.
|
8512
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
8513
|
+
continue;
|
7609
8514
|
}
|
7610
8515
|
|
7611
|
-
|
7612
|
-
|
7613
|
-
|
7614
|
-
|
7615
|
-
|
7616
|
-
continue;
|
8516
|
+
if (type == PM_TOKEN_STRING_CONTENT) {
|
8517
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8518
|
+
}
|
8519
|
+
|
8520
|
+
LEX(type);
|
7617
8521
|
}
|
7618
8522
|
|
7619
8523
|
// If we've hit the incrementor, then we need to skip past it
|
7620
8524
|
// and find the next breakpoint.
|
7621
8525
|
assert(*breakpoint == lex_mode->as.regexp.incrementor);
|
7622
|
-
|
8526
|
+
parser->current.end = breakpoint + 1;
|
8527
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7623
8528
|
lex_mode->as.regexp.nesting++;
|
7624
8529
|
continue;
|
7625
8530
|
}
|
7626
8531
|
|
7627
|
-
|
7628
|
-
|
8532
|
+
if (parser->current.end > parser->current.start) {
|
8533
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8534
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
8535
|
+
}
|
8536
|
+
|
8537
|
+
// If we were unable to find a breakpoint, then this token hits the
|
8538
|
+
// end of the file.
|
7629
8539
|
LEX(PM_TOKEN_EOF);
|
7630
8540
|
}
|
7631
8541
|
case PM_LEX_STRING: {
|
@@ -7646,30 +8556,34 @@ parser_lex(pm_parser_t *parser) {
|
|
7646
8556
|
|
7647
8557
|
// These are the places where we need to split up the content of the
|
7648
8558
|
// string. We'll use strpbrk to find the first of these characters.
|
7649
|
-
|
8559
|
+
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
8560
|
+
const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
|
7650
8561
|
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7651
8562
|
|
8563
|
+
// If we haven't found an escape yet, then this buffer will be
|
8564
|
+
// unallocated since we can refer directly to the source string.
|
8565
|
+
pm_token_buffer_t token_buffer = { 0 };
|
8566
|
+
|
7652
8567
|
while (breakpoint != NULL) {
|
7653
8568
|
// If we hit the incrementor, then we'll increment then nesting and
|
7654
8569
|
// continue lexing.
|
7655
|
-
if (
|
7656
|
-
|
7657
|
-
|
7658
|
-
|
7659
|
-
parser->lex_modes.current->as.string.nesting++;
|
7660
|
-
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
8570
|
+
if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
|
8571
|
+
lex_mode->as.string.nesting++;
|
8572
|
+
parser->current.end = breakpoint + 1;
|
8573
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7661
8574
|
continue;
|
7662
8575
|
}
|
7663
8576
|
|
7664
8577
|
// Note that we have to check the terminator here first because we could
|
7665
8578
|
// potentially be parsing a % string that has a # character as the
|
7666
8579
|
// terminator.
|
7667
|
-
if (*breakpoint ==
|
8580
|
+
if (*breakpoint == lex_mode->as.string.terminator) {
|
7668
8581
|
// If this terminator doesn't actually close the string, then we need
|
7669
8582
|
// to continue on past it.
|
7670
|
-
if (
|
7671
|
-
|
7672
|
-
parser->
|
8583
|
+
if (lex_mode->as.string.nesting > 0) {
|
8584
|
+
parser->current.end = breakpoint + 1;
|
8585
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
8586
|
+
lex_mode->as.string.nesting--;
|
7673
8587
|
continue;
|
7674
8588
|
}
|
7675
8589
|
|
@@ -7677,6 +8591,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7677
8591
|
// then we need to return that content as string content first.
|
7678
8592
|
if (breakpoint > parser->current.start) {
|
7679
8593
|
parser->current.end = breakpoint;
|
8594
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
7680
8595
|
LEX(PM_TOKEN_STRING_CONTENT);
|
7681
8596
|
}
|
7682
8597
|
|
@@ -7690,11 +8605,7 @@ parser_lex(pm_parser_t *parser) {
|
|
7690
8605
|
parser->current.end = breakpoint + 1;
|
7691
8606
|
}
|
7692
8607
|
|
7693
|
-
if (
|
7694
|
-
parser->lex_modes.current->as.string.label_allowed &&
|
7695
|
-
(peek(parser) == ':') &&
|
7696
|
-
(peek_offset(parser, 1) != ':')
|
7697
|
-
) {
|
8608
|
+
if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
|
7698
8609
|
parser->current.end++;
|
7699
8610
|
lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
|
7700
8611
|
lex_mode_pop(parser);
|
@@ -7712,11 +8623,13 @@ parser_lex(pm_parser_t *parser) {
|
|
7712
8623
|
if (*breakpoint == '\n') {
|
7713
8624
|
if (parser->heredoc_end == NULL) {
|
7714
8625
|
pm_newline_list_append(&parser->newline_list, breakpoint);
|
7715
|
-
|
8626
|
+
parser->current.end = breakpoint + 1;
|
8627
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7716
8628
|
continue;
|
7717
8629
|
} else {
|
7718
8630
|
parser->current.end = breakpoint + 1;
|
7719
8631
|
parser_flush_heredoc_end(parser);
|
8632
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
7720
8633
|
LEX(PM_TOKEN_STRING_CONTENT);
|
7721
8634
|
}
|
7722
8635
|
}
|
@@ -7724,58 +8637,110 @@ parser_lex(pm_parser_t *parser) {
|
|
7724
8637
|
switch (*breakpoint) {
|
7725
8638
|
case '\0':
|
7726
8639
|
// Skip directly past the null character.
|
7727
|
-
|
8640
|
+
parser->current.end = breakpoint + 1;
|
8641
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7728
8642
|
break;
|
7729
8643
|
case '\\': {
|
7730
|
-
//
|
7731
|
-
|
7732
|
-
|
7733
|
-
|
7734
|
-
|
7735
|
-
if (
|
7736
|
-
// we're at the end of the file
|
8644
|
+
// Here we hit escapes.
|
8645
|
+
parser->current.end = breakpoint + 1;
|
8646
|
+
|
8647
|
+
// If we've hit the end of the file, then break out of
|
8648
|
+
// the loop by setting the breakpoint to NULL.
|
8649
|
+
if (parser->current.end == parser->end) {
|
7737
8650
|
breakpoint = NULL;
|
7738
|
-
|
8651
|
+
continue;
|
7739
8652
|
}
|
7740
8653
|
|
7741
|
-
|
7742
|
-
|
7743
|
-
|
7744
|
-
|
7745
|
-
|
7746
|
-
|
7747
|
-
|
7748
|
-
|
7749
|
-
|
7750
|
-
|
7751
|
-
|
7752
|
-
|
8654
|
+
pm_token_buffer_escape(parser, &token_buffer);
|
8655
|
+
uint8_t peeked = peek(parser);
|
8656
|
+
|
8657
|
+
switch (peeked) {
|
8658
|
+
case '\\':
|
8659
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8660
|
+
parser->current.end++;
|
8661
|
+
break;
|
8662
|
+
case '\r':
|
8663
|
+
parser->current.end++;
|
8664
|
+
if (peek(parser) != '\n') {
|
8665
|
+
if (!lex_mode->as.string.interpolation) {
|
8666
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8667
|
+
}
|
8668
|
+
pm_token_buffer_push(&token_buffer, '\r');
|
8669
|
+
break;
|
8670
|
+
}
|
8671
|
+
/* fallthrough */
|
8672
|
+
case '\n':
|
8673
|
+
if (!lex_mode->as.string.interpolation) {
|
8674
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8675
|
+
pm_token_buffer_push(&token_buffer, '\n');
|
8676
|
+
}
|
8677
|
+
|
8678
|
+
if (parser->heredoc_end) {
|
8679
|
+
// ... if we are on the same line as a heredoc,
|
8680
|
+
// flush the heredoc and continue parsing after
|
8681
|
+
// heredoc_end.
|
8682
|
+
parser_flush_heredoc_end(parser);
|
8683
|
+
pm_token_buffer_copy(parser, &token_buffer);
|
8684
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
8685
|
+
} else {
|
8686
|
+
// ... else track the newline.
|
8687
|
+
pm_newline_list_append(&parser->newline_list, parser->current.end);
|
8688
|
+
}
|
8689
|
+
|
8690
|
+
parser->current.end++;
|
8691
|
+
break;
|
8692
|
+
default:
|
8693
|
+
if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
|
8694
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8695
|
+
parser->current.end++;
|
8696
|
+
} else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
|
8697
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8698
|
+
parser->current.end++;
|
8699
|
+
} else if (lex_mode->as.string.interpolation) {
|
8700
|
+
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
8701
|
+
} else {
|
8702
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8703
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8704
|
+
parser->current.end++;
|
8705
|
+
}
|
8706
|
+
|
8707
|
+
break;
|
7753
8708
|
}
|
7754
8709
|
|
7755
|
-
|
8710
|
+
token_buffer.cursor = parser->current.end;
|
8711
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7756
8712
|
break;
|
7757
8713
|
}
|
7758
8714
|
case '#': {
|
7759
8715
|
pm_token_type_t type = lex_interpolation(parser, breakpoint);
|
7760
|
-
|
7761
|
-
|
8716
|
+
|
8717
|
+
if (type == PM_TOKEN_NOT_PROVIDED) {
|
8718
|
+
// If we haven't returned at this point then we had something that
|
8719
|
+
// looked like an interpolated class or instance variable like "#@"
|
8720
|
+
// but wasn't actually. In this case we'll just skip to the next
|
8721
|
+
// breakpoint.
|
8722
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
8723
|
+
break;
|
7762
8724
|
}
|
7763
8725
|
|
7764
|
-
|
7765
|
-
|
7766
|
-
|
7767
|
-
|
7768
|
-
|
7769
|
-
break;
|
8726
|
+
if (type == PM_TOKEN_STRING_CONTENT) {
|
8727
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8728
|
+
}
|
8729
|
+
|
8730
|
+
LEX(type);
|
7770
8731
|
}
|
7771
8732
|
default:
|
7772
8733
|
assert(false && "unreachable");
|
7773
8734
|
}
|
7774
8735
|
}
|
7775
8736
|
|
8737
|
+
if (parser->current.end > parser->current.start) {
|
8738
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8739
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
8740
|
+
}
|
8741
|
+
|
7776
8742
|
// If we've hit the end of the string, then this is an unterminated
|
7777
8743
|
// string. In that case we'll return the EOF token.
|
7778
|
-
parser->current.end = parser->end;
|
7779
8744
|
LEX(PM_TOKEN_EOF);
|
7780
8745
|
}
|
7781
8746
|
case PM_LEX_HEREDOC: {
|
@@ -7797,16 +8762,15 @@ parser_lex(pm_parser_t *parser) {
|
|
7797
8762
|
|
7798
8763
|
// Now let's grab the information about the identifier off of the current
|
7799
8764
|
// lex mode.
|
7800
|
-
|
7801
|
-
|
8765
|
+
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
8766
|
+
const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
|
8767
|
+
size_t ident_length = lex_mode->as.heredoc.ident_length;
|
7802
8768
|
|
7803
8769
|
// If we are immediately following a newline and we have hit the
|
7804
8770
|
// terminator, then we need to return the ending of the heredoc.
|
7805
8771
|
if (current_token_starts_line(parser)) {
|
7806
8772
|
const uint8_t *start = parser->current.start;
|
7807
|
-
|
7808
|
-
start += pm_strspn_inline_whitespace(start, parser->end - start);
|
7809
|
-
}
|
8773
|
+
size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
|
7810
8774
|
|
7811
8775
|
if ((start + ident_length <= parser->end) && (memcmp(start, ident_start, ident_length) == 0)) {
|
7812
8776
|
bool matched = true;
|
@@ -7824,10 +8788,10 @@ parser_lex(pm_parser_t *parser) {
|
|
7824
8788
|
}
|
7825
8789
|
|
7826
8790
|
if (matched) {
|
7827
|
-
if (*
|
8791
|
+
if (*lex_mode->as.heredoc.next_start == '\\') {
|
7828
8792
|
parser->next_start = NULL;
|
7829
8793
|
} else {
|
7830
|
-
parser->next_start =
|
8794
|
+
parser->next_start = lex_mode->as.heredoc.next_start;
|
7831
8795
|
parser->heredoc_end = parser->current.end;
|
7832
8796
|
}
|
7833
8797
|
|
@@ -7838,61 +8802,91 @@ parser_lex(pm_parser_t *parser) {
|
|
7838
8802
|
LEX(PM_TOKEN_HEREDOC_END);
|
7839
8803
|
}
|
7840
8804
|
}
|
8805
|
+
|
8806
|
+
if (
|
8807
|
+
lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE &&
|
8808
|
+
(lex_mode->as.heredoc.common_whitespace > whitespace) &&
|
8809
|
+
peek_at(parser, start) != '\n'
|
8810
|
+
) {
|
8811
|
+
lex_mode->as.heredoc.common_whitespace = whitespace;
|
8812
|
+
}
|
7841
8813
|
}
|
7842
8814
|
|
7843
|
-
// Otherwise we'll be parsing string content. These are the places
|
7844
|
-
// we need to split up the content of the heredoc. We'll use
|
7845
|
-
// find the first of these characters.
|
8815
|
+
// Otherwise we'll be parsing string content. These are the places
|
8816
|
+
// where we need to split up the content of the heredoc. We'll use
|
8817
|
+
// strpbrk to find the first of these characters.
|
7846
8818
|
uint8_t breakpoints[] = "\n\\#";
|
7847
8819
|
|
7848
|
-
pm_heredoc_quote_t quote =
|
8820
|
+
pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
|
7849
8821
|
if (quote == PM_HEREDOC_QUOTE_SINGLE) {
|
7850
8822
|
breakpoints[2] = '\0';
|
7851
8823
|
}
|
7852
8824
|
|
7853
8825
|
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
8826
|
+
pm_token_buffer_t token_buffer = { 0 };
|
8827
|
+
bool was_escaped_newline = false;
|
7854
8828
|
|
7855
8829
|
while (breakpoint != NULL) {
|
7856
8830
|
switch (*breakpoint) {
|
7857
8831
|
case '\0':
|
7858
8832
|
// Skip directly past the null character.
|
7859
|
-
|
8833
|
+
parser->current.end = breakpoint + 1;
|
8834
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7860
8835
|
break;
|
7861
8836
|
case '\n': {
|
7862
8837
|
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
|
7863
8838
|
parser_flush_heredoc_end(parser);
|
7864
8839
|
parser->current.end = breakpoint + 1;
|
8840
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
7865
8841
|
LEX(PM_TOKEN_STRING_CONTENT);
|
7866
8842
|
}
|
7867
8843
|
|
7868
8844
|
pm_newline_list_append(&parser->newline_list, breakpoint);
|
7869
8845
|
|
8846
|
+
// If we have a - or ~ heredoc, then we can match after
|
8847
|
+
// some leading whitespace.
|
7870
8848
|
const uint8_t *start = breakpoint + 1;
|
7871
|
-
|
7872
|
-
start += pm_strspn_inline_whitespace(start, parser->end - start);
|
7873
|
-
}
|
8849
|
+
size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
|
7874
8850
|
|
7875
|
-
// If we have hit a newline that is followed by a valid
|
7876
|
-
// then we need to return the content of the
|
7877
|
-
// content. Then, the next time a
|
7878
|
-
// again and return the
|
8851
|
+
// If we have hit a newline that is followed by a valid
|
8852
|
+
// terminator, then we need to return the content of the
|
8853
|
+
// heredoc here as string content. Then, the next time a
|
8854
|
+
// token is lexed, it will match again and return the
|
8855
|
+
// end of the heredoc.
|
7879
8856
|
if (
|
8857
|
+
!was_escaped_newline &&
|
7880
8858
|
(start + ident_length <= parser->end) &&
|
7881
8859
|
(memcmp(start, ident_start, ident_length) == 0)
|
7882
8860
|
) {
|
7883
|
-
// Heredoc terminators must be followed by a
|
8861
|
+
// Heredoc terminators must be followed by a
|
8862
|
+
// newline, CRLF, or EOF to be valid.
|
7884
8863
|
if (
|
7885
8864
|
start + ident_length == parser->end ||
|
7886
8865
|
match_eol_at(parser, start + ident_length)
|
7887
8866
|
) {
|
7888
8867
|
parser->current.end = breakpoint + 1;
|
8868
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8869
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
8870
|
+
}
|
8871
|
+
}
|
8872
|
+
|
8873
|
+
if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
|
8874
|
+
if ((lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
|
8875
|
+
lex_mode->as.heredoc.common_whitespace = whitespace;
|
8876
|
+
}
|
8877
|
+
|
8878
|
+
parser->current.end = breakpoint + 1;
|
8879
|
+
|
8880
|
+
if (!was_escaped_newline) {
|
8881
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
7889
8882
|
LEX(PM_TOKEN_STRING_CONTENT);
|
7890
8883
|
}
|
7891
8884
|
}
|
7892
8885
|
|
7893
|
-
// Otherwise we hit a newline and it wasn't followed by
|
7894
|
-
// terminator, so we can continue parsing.
|
7895
|
-
|
8886
|
+
// Otherwise we hit a newline and it wasn't followed by
|
8887
|
+
// a terminator, so we can continue parsing.
|
8888
|
+
parser->current.end = breakpoint + 1;
|
8889
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7896
8890
|
break;
|
7897
8891
|
}
|
7898
8892
|
case '\\': {
|
@@ -7902,46 +8896,98 @@ parser_lex(pm_parser_t *parser) {
|
|
7902
8896
|
// stop looping before the newline and not after the
|
7903
8897
|
// newline so that we can still potentially find the
|
7904
8898
|
// terminator of the heredoc.
|
7905
|
-
|
7906
|
-
|
7907
|
-
|
7908
|
-
|
7909
|
-
|
7910
|
-
|
7911
|
-
|
7912
|
-
|
7913
|
-
breakpoint = NULL;
|
7914
|
-
break;
|
7915
|
-
}
|
8899
|
+
parser->current.end = breakpoint + 1;
|
8900
|
+
|
8901
|
+
// If we've hit the end of the file, then break out of
|
8902
|
+
// the loop by setting the breakpoint to NULL.
|
8903
|
+
if (parser->current.end == parser->end) {
|
8904
|
+
breakpoint = NULL;
|
8905
|
+
continue;
|
8906
|
+
}
|
7916
8907
|
|
7917
|
-
|
8908
|
+
pm_token_buffer_escape(parser, &token_buffer);
|
8909
|
+
uint8_t peeked = peek(parser);
|
7918
8910
|
|
7919
|
-
|
8911
|
+
if (quote == PM_HEREDOC_QUOTE_SINGLE) {
|
8912
|
+
switch (peeked) {
|
8913
|
+
case '\r':
|
8914
|
+
parser->current.end++;
|
8915
|
+
if (peek(parser) != '\n') {
|
8916
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8917
|
+
pm_token_buffer_push(&token_buffer, '\r');
|
8918
|
+
break;
|
8919
|
+
}
|
8920
|
+
/* fallthrough */
|
8921
|
+
case '\n':
|
8922
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8923
|
+
pm_token_buffer_push(&token_buffer, '\n');
|
8924
|
+
token_buffer.cursor = parser->current.end + 1;
|
8925
|
+
breakpoint = parser->current.end;
|
8926
|
+
continue;
|
8927
|
+
default:
|
8928
|
+
parser->current.end++;
|
8929
|
+
pm_token_buffer_push(&token_buffer, '\\');
|
8930
|
+
pm_token_buffer_push(&token_buffer, peeked);
|
8931
|
+
break;
|
8932
|
+
}
|
8933
|
+
} else {
|
8934
|
+
switch (peeked) {
|
8935
|
+
case '\r':
|
8936
|
+
parser->current.end++;
|
8937
|
+
if (peek(parser) != '\n') {
|
8938
|
+
pm_token_buffer_push(&token_buffer, '\r');
|
8939
|
+
break;
|
8940
|
+
}
|
8941
|
+
/* fallthrough */
|
8942
|
+
case '\n':
|
8943
|
+
was_escaped_newline = true;
|
8944
|
+
token_buffer.cursor = parser->current.end + 1;
|
8945
|
+
breakpoint = parser->current.end;
|
8946
|
+
continue;
|
8947
|
+
default:
|
8948
|
+
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
8949
|
+
break;
|
8950
|
+
}
|
7920
8951
|
}
|
7921
8952
|
|
8953
|
+
token_buffer.cursor = parser->current.end;
|
8954
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7922
8955
|
break;
|
7923
8956
|
}
|
7924
8957
|
case '#': {
|
7925
8958
|
pm_token_type_t type = lex_interpolation(parser, breakpoint);
|
7926
|
-
|
7927
|
-
|
8959
|
+
|
8960
|
+
if (type == PM_TOKEN_NOT_PROVIDED) {
|
8961
|
+
// If we haven't returned at this point then we had
|
8962
|
+
// something that looked like an interpolated class
|
8963
|
+
// or instance variable like "#@" but wasn't
|
8964
|
+
// actually. In this case we'll just skip to the
|
8965
|
+
// next breakpoint.
|
8966
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
8967
|
+
break;
|
7928
8968
|
}
|
7929
8969
|
|
7930
|
-
|
7931
|
-
|
7932
|
-
|
7933
|
-
|
7934
|
-
|
7935
|
-
break;
|
8970
|
+
if (type == PM_TOKEN_STRING_CONTENT) {
|
8971
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8972
|
+
}
|
8973
|
+
|
8974
|
+
LEX(type);
|
7936
8975
|
}
|
7937
8976
|
default:
|
7938
8977
|
assert(false && "unreachable");
|
7939
8978
|
}
|
8979
|
+
|
8980
|
+
was_escaped_newline = false;
|
8981
|
+
}
|
8982
|
+
|
8983
|
+
if (parser->current.end > parser->current.start) {
|
8984
|
+
parser->current.end = parser->end;
|
8985
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
8986
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
7940
8987
|
}
|
7941
8988
|
|
7942
8989
|
// If we've hit the end of the string, then this is an unterminated
|
7943
8990
|
// heredoc. In that case we'll return the EOF token.
|
7944
|
-
parser->current.end = parser->end;
|
7945
8991
|
LEX(PM_TOKEN_EOF);
|
7946
8992
|
}
|
7947
8993
|
}
|
@@ -7955,67 +9001,6 @@ parser_lex(pm_parser_t *parser) {
|
|
7955
9001
|
/* Parse functions */
|
7956
9002
|
/******************************************************************************/
|
7957
9003
|
|
7958
|
-
// When we are parsing certain content, we need to unescape the content to
|
7959
|
-
// provide to the consumers of the parser. The following functions accept a range
|
7960
|
-
// of characters from the source and unescapes into the provided type.
|
7961
|
-
//
|
7962
|
-
// We have functions for unescaping regular expression nodes, string nodes,
|
7963
|
-
// symbol nodes, and xstring nodes
|
7964
|
-
static pm_regular_expression_node_t *
|
7965
|
-
pm_regular_expression_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
|
7966
|
-
pm_regular_expression_node_t *node = pm_regular_expression_node_create(parser, opening, content, closing);
|
7967
|
-
|
7968
|
-
assert((content->end - content->start) >= 0);
|
7969
|
-
pm_string_shared_init(&node->unescaped, content->start, content->end);
|
7970
|
-
|
7971
|
-
pm_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
|
7972
|
-
return node;
|
7973
|
-
}
|
7974
|
-
|
7975
|
-
static pm_symbol_node_t *
|
7976
|
-
pm_symbol_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
|
7977
|
-
pm_symbol_node_t *node = pm_symbol_node_create(parser, opening, content, closing);
|
7978
|
-
|
7979
|
-
assert((content->end - content->start) >= 0);
|
7980
|
-
pm_string_shared_init(&node->unescaped, content->start, content->end);
|
7981
|
-
|
7982
|
-
pm_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
|
7983
|
-
return node;
|
7984
|
-
}
|
7985
|
-
|
7986
|
-
static pm_string_node_t *
|
7987
|
-
pm_char_literal_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
|
7988
|
-
pm_string_node_t *node = pm_string_node_create(parser, opening, content, closing);
|
7989
|
-
|
7990
|
-
assert((content->end - content->start) >= 0);
|
7991
|
-
pm_string_shared_init(&node->unescaped, content->start, content->end);
|
7992
|
-
|
7993
|
-
pm_unescape_manipulate_char_literal(parser, &node->unescaped, unescape_type);
|
7994
|
-
return node;
|
7995
|
-
}
|
7996
|
-
|
7997
|
-
static pm_string_node_t *
|
7998
|
-
pm_string_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
|
7999
|
-
pm_string_node_t *node = pm_string_node_create(parser, opening, content, closing);
|
8000
|
-
|
8001
|
-
assert((content->end - content->start) >= 0);
|
8002
|
-
pm_string_shared_init(&node->unescaped, content->start, content->end);
|
8003
|
-
|
8004
|
-
pm_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
|
8005
|
-
return node;
|
8006
|
-
}
|
8007
|
-
|
8008
|
-
static pm_x_string_node_t *
|
8009
|
-
pm_xstring_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
|
8010
|
-
pm_x_string_node_t *node = pm_xstring_node_create(parser, opening, content, closing);
|
8011
|
-
|
8012
|
-
assert((content->end - content->start) >= 0);
|
8013
|
-
pm_string_shared_init(&node->unescaped, content->start, content->end);
|
8014
|
-
|
8015
|
-
pm_unescape_manipulate_string(parser, &node->unescaped, PM_UNESCAPE_ALL);
|
8016
|
-
return node;
|
8017
|
-
}
|
8018
|
-
|
8019
9004
|
// These are the various precedence rules. Because we are using a Pratt parser,
|
8020
9005
|
// they are named binding power to represent the manner in which nodes are bound
|
8021
9006
|
// together in the stack.
|
@@ -8269,7 +9254,7 @@ expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
|
|
8269
9254
|
if (accept1(parser, type)) return;
|
8270
9255
|
|
8271
9256
|
const uint8_t *location = parser->previous.end;
|
8272
|
-
|
9257
|
+
pm_parser_err(parser, location, location, diag_id);
|
8273
9258
|
|
8274
9259
|
parser->previous.start = location;
|
8275
9260
|
parser->previous.type = PM_TOKEN_MISSING;
|
@@ -8282,7 +9267,7 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
|
|
8282
9267
|
if (accept2(parser, type1, type2)) return;
|
8283
9268
|
|
8284
9269
|
const uint8_t *location = parser->previous.end;
|
8285
|
-
|
9270
|
+
pm_parser_err(parser, location, location, diag_id);
|
8286
9271
|
|
8287
9272
|
parser->previous.start = location;
|
8288
9273
|
parser->previous.type = PM_TOKEN_MISSING;
|
@@ -8294,7 +9279,7 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
|
|
8294
9279
|
if (accept3(parser, type1, type2, type3)) return;
|
8295
9280
|
|
8296
9281
|
const uint8_t *location = parser->previous.end;
|
8297
|
-
|
9282
|
+
pm_parser_err(parser, location, location, diag_id);
|
8298
9283
|
|
8299
9284
|
parser->previous.start = location;
|
8300
9285
|
parser->previous.type = PM_TOKEN_MISSING;
|
@@ -8389,23 +9374,23 @@ parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power,
|
|
8389
9374
|
}
|
8390
9375
|
|
8391
9376
|
// Convert the name of a method into the corresponding write method name. For
|
8392
|
-
//
|
9377
|
+
// example, foo would be turned into foo=.
|
8393
9378
|
static void
|
8394
|
-
parse_write_name(
|
9379
|
+
parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
|
8395
9380
|
// The method name needs to change. If we previously had
|
8396
9381
|
// foo, we now need foo=. In this case we'll allocate a new
|
8397
9382
|
// owned string, copy the previous method name in, and
|
8398
9383
|
// append an =.
|
8399
|
-
|
9384
|
+
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
|
9385
|
+
size_t length = constant->length;
|
8400
9386
|
uint8_t *name = calloc(length + 1, sizeof(uint8_t));
|
8401
9387
|
if (name == NULL) return;
|
8402
9388
|
|
8403
|
-
memcpy(name,
|
9389
|
+
memcpy(name, constant->start, length);
|
8404
9390
|
name[length] = '=';
|
8405
9391
|
|
8406
9392
|
// Now switch the name to the new string.
|
8407
|
-
|
8408
|
-
pm_string_owned_init(string, name, length + 1);
|
9393
|
+
*name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
|
8409
9394
|
}
|
8410
9395
|
|
8411
9396
|
// Convert the given node into a valid target node.
|
@@ -8428,7 +9413,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
8428
9413
|
return target;
|
8429
9414
|
case PM_BACK_REFERENCE_READ_NODE:
|
8430
9415
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
8431
|
-
|
9416
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
8432
9417
|
return target;
|
8433
9418
|
case PM_GLOBAL_VARIABLE_READ_NODE:
|
8434
9419
|
assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
|
@@ -8436,7 +9421,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
8436
9421
|
return target;
|
8437
9422
|
case PM_LOCAL_VARIABLE_READ_NODE:
|
8438
9423
|
if (token_is_numbered_parameter(target->location.start, target->location.end)) {
|
8439
|
-
|
9424
|
+
pm_parser_err_node(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
8440
9425
|
} else {
|
8441
9426
|
assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
|
8442
9427
|
target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
|
@@ -8489,21 +9474,23 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
8489
9474
|
pm_parser_local_add_location(parser, message.start, message.end);
|
8490
9475
|
pm_node_destroy(parser, target);
|
8491
9476
|
|
9477
|
+
uint32_t depth = 0;
|
9478
|
+
for (pm_scope_t *scope = parser->current_scope; scope && scope->transparent; depth++, scope = scope->previous);
|
8492
9479
|
const pm_token_t name = { .type = PM_TOKEN_IDENTIFIER, .start = message.start, .end = message.end };
|
8493
|
-
target = (pm_node_t *) pm_local_variable_read_node_create(parser, &name,
|
9480
|
+
target = (pm_node_t *) pm_local_variable_read_node_create(parser, &name, depth);
|
8494
9481
|
|
8495
9482
|
assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
|
8496
9483
|
target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
|
8497
9484
|
|
8498
9485
|
if (token_is_numbered_parameter(message.start, message.end)) {
|
8499
|
-
|
9486
|
+
pm_parser_err_location(parser, &message, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
8500
9487
|
}
|
8501
9488
|
|
8502
9489
|
return target;
|
8503
9490
|
}
|
8504
9491
|
|
8505
9492
|
if (*call->message_loc.start == '_' || parser->encoding.alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
|
8506
|
-
parse_write_name(&call->name);
|
9493
|
+
parse_write_name(parser, &call->name);
|
8507
9494
|
return (pm_node_t *) call;
|
8508
9495
|
}
|
8509
9496
|
}
|
@@ -8518,9 +9505,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
8518
9505
|
(call->message_loc.end[-1] == ']') &&
|
8519
9506
|
(call->block == NULL)
|
8520
9507
|
) {
|
8521
|
-
//
|
8522
|
-
|
8523
|
-
pm_string_constant_init(&call->name, "[]=", 3);
|
9508
|
+
// Replace the name with "[]=".
|
9509
|
+
call->name = pm_parser_constant_id_static(parser, "[]=", 3);
|
8524
9510
|
return target;
|
8525
9511
|
}
|
8526
9512
|
}
|
@@ -8529,7 +9515,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
8529
9515
|
// In this case we have a node that we don't know how to convert
|
8530
9516
|
// into a target. We need to treat it as an error. For now, we'll
|
8531
9517
|
// mark it as an error and just skip right past it.
|
8532
|
-
|
9518
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
8533
9519
|
return target;
|
8534
9520
|
}
|
8535
9521
|
}
|
@@ -8542,7 +9528,7 @@ parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
|
|
8542
9528
|
|
8543
9529
|
// Ensure that we have either an = or a ) after the targets.
|
8544
9530
|
if (!match3(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_KEYWORD_IN)) {
|
8545
|
-
|
9531
|
+
pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
8546
9532
|
}
|
8547
9533
|
|
8548
9534
|
return result;
|
@@ -8568,7 +9554,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
8568
9554
|
}
|
8569
9555
|
case PM_BACK_REFERENCE_READ_NODE:
|
8570
9556
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
8571
|
-
|
9557
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
8572
9558
|
/* fallthrough */
|
8573
9559
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
8574
9560
|
pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
|
@@ -8577,7 +9563,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
8577
9563
|
}
|
8578
9564
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
8579
9565
|
if (token_is_numbered_parameter(target->location.start, target->location.end)) {
|
8580
|
-
|
9566
|
+
pm_parser_err_node(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
8581
9567
|
}
|
8582
9568
|
|
8583
9569
|
pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
|
@@ -8642,7 +9628,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
8642
9628
|
target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
|
8643
9629
|
|
8644
9630
|
if (token_is_numbered_parameter(message.start, message.end)) {
|
8645
|
-
|
9631
|
+
pm_parser_err_location(parser, &message, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
8646
9632
|
}
|
8647
9633
|
|
8648
9634
|
return target;
|
@@ -8665,7 +9651,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
8665
9651
|
pm_arguments_node_arguments_append(arguments, value);
|
8666
9652
|
call->base.location.end = arguments->base.location.end;
|
8667
9653
|
|
8668
|
-
parse_write_name(&call->name);
|
9654
|
+
parse_write_name(parser, &call->name);
|
8669
9655
|
return (pm_node_t *) call;
|
8670
9656
|
}
|
8671
9657
|
}
|
@@ -8686,9 +9672,8 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
8686
9672
|
pm_arguments_node_arguments_append(call->arguments, value);
|
8687
9673
|
target->location.end = value->location.end;
|
8688
9674
|
|
8689
|
-
//
|
8690
|
-
|
8691
|
-
pm_string_constant_init(&call->name, "[]=", 3);
|
9675
|
+
// Replace the name with "[]=".
|
9676
|
+
call->name = pm_parser_constant_id_static(parser, "[]=", 3);
|
8692
9677
|
return target;
|
8693
9678
|
}
|
8694
9679
|
|
@@ -8704,7 +9689,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
8704
9689
|
// In this case we have a node that we don't know how to convert into a
|
8705
9690
|
// target. We need to treat it as an error. For now, we'll mark it as an
|
8706
9691
|
// error and just skip right past it.
|
8707
|
-
|
9692
|
+
pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
8708
9693
|
return target;
|
8709
9694
|
}
|
8710
9695
|
}
|
@@ -8730,7 +9715,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
8730
9715
|
// anonymous. It can be the final target or be in the middle if
|
8731
9716
|
// there haven't been any others yet.
|
8732
9717
|
if (has_splat) {
|
8733
|
-
|
9718
|
+
pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
|
8734
9719
|
}
|
8735
9720
|
|
8736
9721
|
pm_token_t star_operator = parser->previous;
|
@@ -8770,7 +9755,7 @@ parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_
|
|
8770
9755
|
|
8771
9756
|
// Ensure that we have either an = or a ) after the targets.
|
8772
9757
|
if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
8773
|
-
|
9758
|
+
pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
8774
9759
|
}
|
8775
9760
|
|
8776
9761
|
return result;
|
@@ -8863,7 +9848,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
|
|
8863
9848
|
if (token_begins_expression_p(parser->current.type)) {
|
8864
9849
|
value = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
8865
9850
|
} else if (pm_parser_local_depth(parser, &operator) == -1) {
|
8866
|
-
|
9851
|
+
pm_parser_err_token(parser, &operator, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
8867
9852
|
}
|
8868
9853
|
|
8869
9854
|
element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
|
@@ -8970,7 +9955,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
8970
9955
|
|
8971
9956
|
while (!match1(parser, PM_TOKEN_EOF)) {
|
8972
9957
|
if (parsed_block_argument) {
|
8973
|
-
|
9958
|
+
pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
|
8974
9959
|
}
|
8975
9960
|
|
8976
9961
|
pm_node_t *argument = NULL;
|
@@ -8979,7 +9964,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
8979
9964
|
case PM_TOKEN_USTAR_STAR:
|
8980
9965
|
case PM_TOKEN_LABEL: {
|
8981
9966
|
if (parsed_bare_hash) {
|
8982
|
-
|
9967
|
+
pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
|
8983
9968
|
}
|
8984
9969
|
|
8985
9970
|
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
|
@@ -9001,7 +9986,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
9001
9986
|
if (token_begins_expression_p(parser->current.type)) {
|
9002
9987
|
expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
|
9003
9988
|
} else if (pm_parser_local_depth(parser, &operator) == -1) {
|
9004
|
-
|
9989
|
+
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
|
9005
9990
|
}
|
9006
9991
|
|
9007
9992
|
argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
|
@@ -9020,7 +10005,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
9020
10005
|
|
9021
10006
|
if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA)) {
|
9022
10007
|
if (pm_parser_local_depth(parser, &parser->previous) == -1) {
|
9023
|
-
|
10008
|
+
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
9024
10009
|
}
|
9025
10010
|
|
9026
10011
|
argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
|
@@ -9028,7 +10013,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
9028
10013
|
pm_node_t *expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
|
9029
10014
|
|
9030
10015
|
if (parsed_bare_hash) {
|
9031
|
-
|
10016
|
+
pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
|
9032
10017
|
}
|
9033
10018
|
|
9034
10019
|
argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
|
@@ -9049,7 +10034,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
9049
10034
|
argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
|
9050
10035
|
} else {
|
9051
10036
|
if (pm_parser_local_depth(parser, &parser->previous) == -1) {
|
9052
|
-
|
10037
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
9053
10038
|
}
|
9054
10039
|
|
9055
10040
|
argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
|
@@ -9066,7 +10051,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
9066
10051
|
|
9067
10052
|
if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
|
9068
10053
|
if (parsed_bare_hash) {
|
9069
|
-
|
10054
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
|
9070
10055
|
}
|
9071
10056
|
|
9072
10057
|
pm_token_t operator;
|
@@ -9145,7 +10130,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
9145
10130
|
|
9146
10131
|
if (node->parameters.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
9147
10132
|
if (parsed_splat) {
|
9148
|
-
|
10133
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT);
|
9149
10134
|
}
|
9150
10135
|
|
9151
10136
|
param = (pm_node_t *) pm_splat_node_create(parser, &parser->previous, NULL);
|
@@ -9157,7 +10142,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
9157
10142
|
param = (pm_node_t *) parse_required_destructured_parameter(parser);
|
9158
10143
|
} else if (accept1(parser, PM_TOKEN_USTAR)) {
|
9159
10144
|
if (parsed_splat) {
|
9160
|
-
|
10145
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT);
|
9161
10146
|
}
|
9162
10147
|
|
9163
10148
|
pm_token_t star = parser->previous;
|
@@ -9166,6 +10151,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
9166
10151
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
9167
10152
|
pm_token_t name = parser->previous;
|
9168
10153
|
value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
|
10154
|
+
pm_parser_parameter_name_check(parser, &name);
|
9169
10155
|
pm_parser_local_add_token(parser, &name);
|
9170
10156
|
}
|
9171
10157
|
|
@@ -9176,6 +10162,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
9176
10162
|
pm_token_t name = parser->previous;
|
9177
10163
|
|
9178
10164
|
param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
|
10165
|
+
pm_parser_parameter_name_check(parser, &name);
|
9179
10166
|
pm_parser_local_add_token(parser, &name);
|
9180
10167
|
}
|
9181
10168
|
|
@@ -9237,12 +10224,12 @@ update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_ord
|
|
9237
10224
|
}
|
9238
10225
|
|
9239
10226
|
if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
|
9240
|
-
|
10227
|
+
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
|
9241
10228
|
}
|
9242
10229
|
|
9243
10230
|
if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
|
9244
10231
|
// We know what transition we failed on, so we can provide a better error here.
|
9245
|
-
|
10232
|
+
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
|
9246
10233
|
} else if (state < *current) {
|
9247
10234
|
*current = state;
|
9248
10235
|
}
|
@@ -9297,7 +10284,7 @@ parse_parameters(
|
|
9297
10284
|
if (params->block == NULL) {
|
9298
10285
|
pm_parameters_node_block_set(params, param);
|
9299
10286
|
} else {
|
9300
|
-
|
10287
|
+
pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
|
9301
10288
|
pm_parameters_node_posts_append(params, (pm_node_t *) param);
|
9302
10289
|
}
|
9303
10290
|
|
@@ -9305,7 +10292,7 @@ parse_parameters(
|
|
9305
10292
|
}
|
9306
10293
|
case PM_TOKEN_UDOT_DOT_DOT: {
|
9307
10294
|
if (!allows_forwarding_parameter) {
|
9308
|
-
|
10295
|
+
pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
9309
10296
|
}
|
9310
10297
|
if (order > PM_PARAMETERS_ORDER_NOTHING_AFTER) {
|
9311
10298
|
update_parameter_state(parser, &parser->current, &order);
|
@@ -9318,7 +10305,7 @@ parse_parameters(
|
|
9318
10305
|
// forwarding parameter and move the keyword rest parameter to the posts list.
|
9319
10306
|
pm_node_t *keyword_rest = params->keyword_rest;
|
9320
10307
|
pm_parameters_node_posts_append(params, keyword_rest);
|
9321
|
-
|
10308
|
+
pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
|
9322
10309
|
params->keyword_rest = NULL;
|
9323
10310
|
}
|
9324
10311
|
pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
|
@@ -9337,19 +10324,19 @@ parse_parameters(
|
|
9337
10324
|
parser_lex(parser);
|
9338
10325
|
switch (parser->previous.type) {
|
9339
10326
|
case PM_TOKEN_CONSTANT:
|
9340
|
-
|
10327
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
|
9341
10328
|
break;
|
9342
10329
|
case PM_TOKEN_INSTANCE_VARIABLE:
|
9343
|
-
|
10330
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
|
9344
10331
|
break;
|
9345
10332
|
case PM_TOKEN_GLOBAL_VARIABLE:
|
9346
|
-
|
10333
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
|
9347
10334
|
break;
|
9348
10335
|
case PM_TOKEN_CLASS_VARIABLE:
|
9349
|
-
|
10336
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
|
9350
10337
|
break;
|
9351
10338
|
case PM_TOKEN_METHOD_NAME:
|
9352
|
-
|
10339
|
+
pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
|
9353
10340
|
break;
|
9354
10341
|
default: break;
|
9355
10342
|
}
|
@@ -9466,7 +10453,7 @@ parse_parameters(
|
|
9466
10453
|
if (params->rest == NULL) {
|
9467
10454
|
pm_parameters_node_rest_set(params, param);
|
9468
10455
|
} else {
|
9469
|
-
|
10456
|
+
pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
|
9470
10457
|
pm_parameters_node_posts_append(params, (pm_node_t *) param);
|
9471
10458
|
}
|
9472
10459
|
|
@@ -9500,7 +10487,7 @@ parse_parameters(
|
|
9500
10487
|
if (params->keyword_rest == NULL) {
|
9501
10488
|
pm_parameters_node_keyword_rest_set(params, param);
|
9502
10489
|
} else {
|
9503
|
-
|
10490
|
+
pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
|
9504
10491
|
pm_parameters_node_posts_append(params, param);
|
9505
10492
|
}
|
9506
10493
|
|
@@ -9518,11 +10505,11 @@ parse_parameters(
|
|
9518
10505
|
if (params->rest == NULL) {
|
9519
10506
|
pm_parameters_node_rest_set(params, param);
|
9520
10507
|
} else {
|
9521
|
-
|
10508
|
+
pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
|
9522
10509
|
pm_parameters_node_posts_append(params, (pm_node_t *) param);
|
9523
10510
|
}
|
9524
10511
|
} else {
|
9525
|
-
|
10512
|
+
pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
|
9526
10513
|
}
|
9527
10514
|
}
|
9528
10515
|
|
@@ -9725,9 +10712,10 @@ parse_block_parameters(
|
|
9725
10712
|
}
|
9726
10713
|
|
9727
10714
|
pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
|
9728
|
-
if (accept1(parser, PM_TOKEN_SEMICOLON)) {
|
10715
|
+
if ((opening->type != PM_TOKEN_NOT_PROVIDED) && accept1(parser, PM_TOKEN_SEMICOLON)) {
|
9729
10716
|
do {
|
9730
10717
|
expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
|
10718
|
+
pm_parser_parameter_name_check(parser, &parser->previous);
|
9731
10719
|
pm_parser_local_add_token(parser, &parser->previous);
|
9732
10720
|
|
9733
10721
|
pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
|
@@ -9850,7 +10838,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
9850
10838
|
if (arguments->block == NULL) {
|
9851
10839
|
arguments->block = (pm_node_t *) block;
|
9852
10840
|
} else {
|
9853
|
-
|
10841
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
|
9854
10842
|
if (arguments->arguments == NULL) {
|
9855
10843
|
arguments->arguments = pm_arguments_node_create(parser);
|
9856
10844
|
}
|
@@ -9873,7 +10861,7 @@ parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_contex
|
|
9873
10861
|
bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
|
9874
10862
|
predicate_closed |= accept1(parser, PM_TOKEN_KEYWORD_THEN);
|
9875
10863
|
if (!predicate_closed) {
|
9876
|
-
|
10864
|
+
pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
|
9877
10865
|
}
|
9878
10866
|
|
9879
10867
|
context_pop(parser);
|
@@ -10057,25 +11045,12 @@ parse_string_part(pm_parser_t *parser) {
|
|
10057
11045
|
// "aaa #{bbb} #@ccc ddd"
|
10058
11046
|
// ^^^^ ^ ^^^^
|
10059
11047
|
case PM_TOKEN_STRING_CONTENT: {
|
10060
|
-
pm_unescape_type_t unescape_type = PM_UNESCAPE_ALL;
|
10061
|
-
|
10062
|
-
if (parser->lex_modes.current->mode == PM_LEX_HEREDOC) {
|
10063
|
-
if (parser->lex_modes.current->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
|
10064
|
-
// If we're in a tilde heredoc, we want to unescape it later
|
10065
|
-
// because we don't want unescaped newlines to disappear
|
10066
|
-
// before we handle them in the dedent.
|
10067
|
-
unescape_type = PM_UNESCAPE_NONE;
|
10068
|
-
} else if (parser->lex_modes.current->as.heredoc.quote == PM_HEREDOC_QUOTE_SINGLE) {
|
10069
|
-
unescape_type = PM_UNESCAPE_MINIMAL;
|
10070
|
-
}
|
10071
|
-
}
|
10072
|
-
|
10073
|
-
parser_lex(parser);
|
10074
|
-
|
10075
11048
|
pm_token_t opening = not_provided(parser);
|
10076
11049
|
pm_token_t closing = not_provided(parser);
|
11050
|
+
pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
|
10077
11051
|
|
10078
|
-
|
11052
|
+
parser_lex(parser);
|
11053
|
+
return node;
|
10079
11054
|
}
|
10080
11055
|
// Here the lexer has returned the beginning of an embedded expression. In
|
10081
11056
|
// that case we'll parse the inner statements and return that as the part.
|
@@ -10166,7 +11141,7 @@ parse_string_part(pm_parser_t *parser) {
|
|
10166
11141
|
}
|
10167
11142
|
default:
|
10168
11143
|
parser_lex(parser);
|
10169
|
-
|
11144
|
+
pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
|
10170
11145
|
return NULL;
|
10171
11146
|
}
|
10172
11147
|
}
|
@@ -10177,7 +11152,6 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
10177
11152
|
|
10178
11153
|
if (lex_mode->mode != PM_LEX_STRING) {
|
10179
11154
|
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
|
10180
|
-
pm_token_t symbol;
|
10181
11155
|
|
10182
11156
|
switch (parser->current.type) {
|
10183
11157
|
case PM_TOKEN_IDENTIFIER:
|
@@ -10190,21 +11164,21 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
10190
11164
|
case PM_TOKEN_BACK_REFERENCE:
|
10191
11165
|
case PM_CASE_KEYWORD:
|
10192
11166
|
parser_lex(parser);
|
10193
|
-
symbol = parser->previous;
|
10194
11167
|
break;
|
10195
11168
|
case PM_CASE_OPERATOR:
|
10196
11169
|
lex_state_set(parser, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
|
10197
11170
|
parser_lex(parser);
|
10198
|
-
symbol = parser->previous;
|
10199
11171
|
break;
|
10200
11172
|
default:
|
10201
11173
|
expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
|
10202
|
-
symbol = parser->previous;
|
10203
11174
|
break;
|
10204
11175
|
}
|
10205
11176
|
|
10206
11177
|
pm_token_t closing = not_provided(parser);
|
10207
|
-
|
11178
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
11179
|
+
|
11180
|
+
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
11181
|
+
return (pm_node_t *) symbol;
|
10208
11182
|
}
|
10209
11183
|
|
10210
11184
|
if (lex_mode->as.string.interpolation) {
|
@@ -10215,7 +11189,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
10215
11189
|
|
10216
11190
|
pm_token_t content = not_provided(parser);
|
10217
11191
|
pm_token_t closing = parser->previous;
|
10218
|
-
return (pm_node_t *)
|
11192
|
+
return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
|
10219
11193
|
}
|
10220
11194
|
|
10221
11195
|
// Now we can parse the first part of the symbol.
|
@@ -10248,18 +11222,23 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
10248
11222
|
}
|
10249
11223
|
|
10250
11224
|
pm_token_t content;
|
10251
|
-
|
10252
|
-
|
11225
|
+
pm_string_t unescaped;
|
11226
|
+
|
11227
|
+
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
11228
|
+
content = parser->current;
|
11229
|
+
unescaped = parser->current_string;
|
11230
|
+
parser_lex(parser);
|
10253
11231
|
} else {
|
10254
11232
|
content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
|
11233
|
+
pm_string_shared_init(&unescaped, content.start, content.end);
|
10255
11234
|
}
|
10256
11235
|
|
10257
11236
|
if (next_state != PM_LEX_STATE_NONE) {
|
10258
11237
|
lex_state_set(parser, next_state);
|
10259
11238
|
}
|
10260
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
|
10261
11239
|
|
10262
|
-
|
11240
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
|
11241
|
+
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
10263
11242
|
}
|
10264
11243
|
|
10265
11244
|
// Parse an argument to undef which can either be a bare word, a
|
@@ -10276,8 +11255,10 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
10276
11255
|
|
10277
11256
|
pm_token_t opening = not_provided(parser);
|
10278
11257
|
pm_token_t closing = not_provided(parser);
|
11258
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
10279
11259
|
|
10280
|
-
|
11260
|
+
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
11261
|
+
return (pm_node_t *) symbol;
|
10281
11262
|
}
|
10282
11263
|
case PM_TOKEN_SYMBOL_BEGIN: {
|
10283
11264
|
pm_lex_mode_t lex_mode = *parser->lex_modes.current;
|
@@ -10286,7 +11267,7 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
10286
11267
|
return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE);
|
10287
11268
|
}
|
10288
11269
|
default:
|
10289
|
-
|
11270
|
+
pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
|
10290
11271
|
return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
|
10291
11272
|
}
|
10292
11273
|
}
|
@@ -10310,8 +11291,10 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
|
|
10310
11291
|
parser_lex(parser);
|
10311
11292
|
pm_token_t opening = not_provided(parser);
|
10312
11293
|
pm_token_t closing = not_provided(parser);
|
11294
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
10313
11295
|
|
10314
|
-
|
11296
|
+
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
11297
|
+
return (pm_node_t *) symbol;
|
10315
11298
|
}
|
10316
11299
|
case PM_TOKEN_SYMBOL_BEGIN: {
|
10317
11300
|
pm_lex_mode_t lex_mode = *parser->lex_modes.current;
|
@@ -10329,7 +11312,7 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
|
|
10329
11312
|
parser_lex(parser);
|
10330
11313
|
return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
|
10331
11314
|
default:
|
10332
|
-
|
11315
|
+
pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
|
10333
11316
|
return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
|
10334
11317
|
}
|
10335
11318
|
}
|
@@ -10366,9 +11349,9 @@ parse_variable_call(pm_parser_t *parser) {
|
|
10366
11349
|
// local variable read. If it's not, then we'll create a normal call
|
10367
11350
|
// node but add an error.
|
10368
11351
|
if (parser->current_scope->explicit_params) {
|
10369
|
-
|
11352
|
+
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
|
10370
11353
|
} else if (outer_scope_using_numbered_params_p(parser)) {
|
10371
|
-
|
11354
|
+
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
10372
11355
|
} else {
|
10373
11356
|
// When you use a numbered parameter, it implies the existence
|
10374
11357
|
// of all of the locals that exist before it. For example,
|
@@ -10421,76 +11404,8 @@ parse_method_definition_name(pm_parser_t *parser) {
|
|
10421
11404
|
}
|
10422
11405
|
}
|
10423
11406
|
|
10424
|
-
static
|
10425
|
-
|
10426
|
-
{
|
10427
|
-
const pm_location_t *content_loc = &((pm_string_node_t *) node)->content_loc;
|
10428
|
-
int cur_whitespace;
|
10429
|
-
const uint8_t *cur_char = content_loc->start;
|
10430
|
-
|
10431
|
-
while (cur_char && cur_char < content_loc->end) {
|
10432
|
-
// Any empty newlines aren't included in the minimum whitespace
|
10433
|
-
// calculation.
|
10434
|
-
size_t eol_length;
|
10435
|
-
while ((eol_length = match_eol_at(parser, cur_char))) {
|
10436
|
-
cur_char += eol_length;
|
10437
|
-
}
|
10438
|
-
|
10439
|
-
if (cur_char == content_loc->end) break;
|
10440
|
-
|
10441
|
-
cur_whitespace = 0;
|
10442
|
-
|
10443
|
-
while (pm_char_is_inline_whitespace(*cur_char) && cur_char < content_loc->end) {
|
10444
|
-
if (cur_char[0] == '\t') {
|
10445
|
-
cur_whitespace = (cur_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
|
10446
|
-
} else {
|
10447
|
-
cur_whitespace++;
|
10448
|
-
}
|
10449
|
-
cur_char++;
|
10450
|
-
}
|
10451
|
-
|
10452
|
-
// If we hit a newline, then we have encountered a line that
|
10453
|
-
// contains only whitespace, and it shouldn't be considered in
|
10454
|
-
// the calculation of common leading whitespace.
|
10455
|
-
eol_length = match_eol_at(parser, cur_char);
|
10456
|
-
if (eol_length) {
|
10457
|
-
cur_char += eol_length;
|
10458
|
-
continue;
|
10459
|
-
}
|
10460
|
-
|
10461
|
-
if (cur_whitespace < common_whitespace || common_whitespace == -1) {
|
10462
|
-
common_whitespace = cur_whitespace;
|
10463
|
-
}
|
10464
|
-
|
10465
|
-
cur_char = next_newline(cur_char + 1, parser->end - (cur_char + 1));
|
10466
|
-
if (cur_char) cur_char++;
|
10467
|
-
}
|
10468
|
-
return common_whitespace;
|
10469
|
-
}
|
10470
|
-
|
10471
|
-
// Calculate the common leading whitespace for each line in a heredoc.
|
10472
|
-
static int
|
10473
|
-
parse_heredoc_common_whitespace(pm_parser_t *parser, pm_node_list_t *nodes) {
|
10474
|
-
int common_whitespace = -1;
|
10475
|
-
|
10476
|
-
for (size_t index = 0; index < nodes->size; index++) {
|
10477
|
-
pm_node_t *node = nodes->nodes[index];
|
10478
|
-
if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) continue;
|
10479
|
-
|
10480
|
-
// If the previous node wasn't a string node, we don't want to trim
|
10481
|
-
// whitespace. This could happen after an interpolated expression or
|
10482
|
-
// variable.
|
10483
|
-
if (index == 0 || PM_NODE_TYPE_P(nodes->nodes[index - 1], PM_STRING_NODE)) {
|
10484
|
-
common_whitespace = parse_heredoc_common_whitespace_for_single_node(parser, node, common_whitespace);
|
10485
|
-
}
|
10486
|
-
}
|
10487
|
-
|
10488
|
-
return common_whitespace;
|
10489
|
-
}
|
10490
|
-
|
10491
|
-
static pm_string_t *
|
10492
|
-
parse_heredoc_dedent_single_node(pm_parser_t *parser, pm_string_t *string, bool dedent_node, int common_whitespace, pm_heredoc_quote_t quote)
|
10493
|
-
{
|
11407
|
+
static void
|
11408
|
+
parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
|
10494
11409
|
// Get a reference to the string struct that is being held by the string
|
10495
11410
|
// node. This is the value we're going to actually manipulate.
|
10496
11411
|
pm_string_ensure_owned(string);
|
@@ -10499,85 +11414,37 @@ parse_heredoc_dedent_single_node(pm_parser_t *parser, pm_string_t *string, bool
|
|
10499
11414
|
// destination to move bytes into. We'll also use it for bounds checking
|
10500
11415
|
// since we don't require that these strings be null terminated.
|
10501
11416
|
size_t dest_length = pm_string_length(string);
|
10502
|
-
uint8_t *
|
10503
|
-
|
10504
|
-
const uint8_t *source_cursor = source_start;
|
11417
|
+
const uint8_t *source_cursor = (uint8_t *) string->source;
|
10505
11418
|
const uint8_t *source_end = source_cursor + dest_length;
|
10506
11419
|
|
10507
11420
|
// We're going to move bytes backward in the string when we get leading
|
10508
11421
|
// whitespace, so we'll maintain a pointer to the current position in the
|
10509
11422
|
// string that we're writing to.
|
10510
|
-
|
10511
|
-
|
10512
|
-
|
10513
|
-
|
10514
|
-
|
10515
|
-
|
10516
|
-
|
10517
|
-
|
10518
|
-
|
10519
|
-
|
10520
|
-
|
10521
|
-
|
10522
|
-
while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
|
10523
|
-
if (*source_cursor == '\t') {
|
10524
|
-
trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
|
10525
|
-
if (trimmed_whitespace > common_whitespace) break;
|
10526
|
-
} else {
|
10527
|
-
trimmed_whitespace++;
|
10528
|
-
}
|
10529
|
-
|
10530
|
-
source_cursor++;
|
10531
|
-
dest_length--;
|
10532
|
-
}
|
10533
|
-
}
|
10534
|
-
|
10535
|
-
// At this point we have dedented all that we need to, so we need to find
|
10536
|
-
// the next newline.
|
10537
|
-
const uint8_t *breakpoint = next_newline(source_cursor, source_end - source_cursor);
|
10538
|
-
|
10539
|
-
if (breakpoint == NULL) {
|
10540
|
-
// If there isn't another newline, then we can just move the rest of the
|
10541
|
-
// string and break from the loop.
|
10542
|
-
memmove(dest_cursor, source_cursor, (size_t) (source_end - source_cursor));
|
10543
|
-
break;
|
11423
|
+
size_t trimmed_whitespace = 0;
|
11424
|
+
|
11425
|
+
// While we haven't reached the amount of common whitespace that we need to
|
11426
|
+
// trim and we haven't reached the end of the string, we'll keep trimming
|
11427
|
+
// whitespace. Trimming in this context means skipping over these bytes such
|
11428
|
+
// that they aren't copied into the new string.
|
11429
|
+
while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
|
11430
|
+
if (*source_cursor == '\t') {
|
11431
|
+
trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
|
11432
|
+
if (trimmed_whitespace > common_whitespace) break;
|
11433
|
+
} else {
|
11434
|
+
trimmed_whitespace++;
|
10544
11435
|
}
|
10545
11436
|
|
10546
|
-
|
10547
|
-
|
10548
|
-
if (breakpoint < source_end) breakpoint++;
|
10549
|
-
memmove(dest_cursor, source_cursor, (size_t) (breakpoint - source_cursor));
|
10550
|
-
dest_cursor += (breakpoint - source_cursor);
|
10551
|
-
source_cursor = breakpoint;
|
10552
|
-
dedent_node = true;
|
11437
|
+
source_cursor++;
|
11438
|
+
dest_length--;
|
10553
11439
|
}
|
10554
11440
|
|
10555
|
-
|
11441
|
+
memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
|
10556
11442
|
string->length = dest_length;
|
10557
|
-
|
10558
|
-
if (dest_length != 0) {
|
10559
|
-
pm_unescape_manipulate_string(parser, string, (quote == PM_HEREDOC_QUOTE_SINGLE) ? PM_UNESCAPE_MINIMAL : PM_UNESCAPE_ALL);
|
10560
|
-
}
|
10561
|
-
return string;
|
10562
11443
|
}
|
10563
11444
|
|
10564
11445
|
// Take a heredoc node that is indented by a ~ and trim the leading whitespace.
|
10565
11446
|
static void
|
10566
|
-
parse_heredoc_dedent(pm_parser_t *parser,
|
10567
|
-
{
|
10568
|
-
pm_node_list_t *nodes;
|
10569
|
-
|
10570
|
-
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
10571
|
-
nodes = &((pm_interpolated_x_string_node_t *) heredoc_node)->parts;
|
10572
|
-
} else {
|
10573
|
-
nodes = &((pm_interpolated_string_node_t *) heredoc_node)->parts;
|
10574
|
-
}
|
10575
|
-
|
10576
|
-
// First, calculate how much common whitespace we need to trim. If there is
|
10577
|
-
// none or it's 0, then we can return early.
|
10578
|
-
int common_whitespace;
|
10579
|
-
if ((common_whitespace = parse_heredoc_common_whitespace(parser, nodes)) <= 0) return;
|
10580
|
-
|
11447
|
+
parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
|
10581
11448
|
// The next node should be dedented if it's the first node in the list or if
|
10582
11449
|
// if follows a string node.
|
10583
11450
|
bool dedent_next = true;
|
@@ -10600,7 +11467,10 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_t *heredoc_node, pm_heredoc_qu
|
|
10600
11467
|
}
|
10601
11468
|
|
10602
11469
|
pm_string_node_t *string_node = ((pm_string_node_t *) node);
|
10603
|
-
|
11470
|
+
if (dedent_next) {
|
11471
|
+
parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
|
11472
|
+
}
|
11473
|
+
|
10604
11474
|
if (string_node->unescaped.length == 0) {
|
10605
11475
|
pm_node_destroy(parser, node);
|
10606
11476
|
} else {
|
@@ -10929,13 +11799,13 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
10929
11799
|
case PM_TOKEN_STRING_BEGIN:
|
10930
11800
|
key = parse_expression(parser, PM_BINDING_POWER_MAX, PM_ERR_PATTERN_HASH_KEY);
|
10931
11801
|
if (!pm_symbol_node_label_p(key)) {
|
10932
|
-
|
11802
|
+
pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
|
10933
11803
|
}
|
10934
11804
|
|
10935
11805
|
break;
|
10936
11806
|
default:
|
10937
11807
|
parser_lex(parser);
|
10938
|
-
|
11808
|
+
pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
|
10939
11809
|
key = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
10940
11810
|
break;
|
10941
11811
|
}
|
@@ -10970,7 +11840,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
10970
11840
|
return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
|
10971
11841
|
}
|
10972
11842
|
default: {
|
10973
|
-
|
11843
|
+
pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
|
10974
11844
|
pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
|
10975
11845
|
return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
|
10976
11846
|
}
|
@@ -11058,7 +11928,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
11058
11928
|
default: {
|
11059
11929
|
// If we get here, then we have a pin operator followed by something
|
11060
11930
|
// not understood. We'll create a missing node and return that.
|
11061
|
-
|
11931
|
+
pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
|
11062
11932
|
pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
|
11063
11933
|
return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
|
11064
11934
|
}
|
@@ -11082,7 +11952,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
11082
11952
|
return parse_pattern_constant_path(parser, node);
|
11083
11953
|
}
|
11084
11954
|
default:
|
11085
|
-
|
11955
|
+
pm_parser_err_current(parser, diag_id);
|
11086
11956
|
return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
|
11087
11957
|
}
|
11088
11958
|
}
|
@@ -11126,7 +11996,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
11126
11996
|
break;
|
11127
11997
|
}
|
11128
11998
|
default: {
|
11129
|
-
|
11999
|
+
pm_parser_err_current(parser, diag_id);
|
11130
12000
|
pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
|
11131
12001
|
|
11132
12002
|
if (node == NULL) {
|
@@ -11218,7 +12088,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
11218
12088
|
// will continue to parse the rest of the patterns, but we will indicate
|
11219
12089
|
// it as an error.
|
11220
12090
|
if (trailing_rest) {
|
11221
|
-
|
12091
|
+
pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
|
11222
12092
|
}
|
11223
12093
|
|
11224
12094
|
trailing_rest = true;
|
@@ -11284,6 +12154,7 @@ static inline pm_node_t *
|
|
11284
12154
|
parse_strings(pm_parser_t *parser) {
|
11285
12155
|
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
|
11286
12156
|
pm_node_t *result = NULL;
|
12157
|
+
bool state_is_arg_labeled = lex_state_p(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
|
11287
12158
|
|
11288
12159
|
while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
11289
12160
|
pm_node_t *node = NULL;
|
@@ -11301,17 +12172,30 @@ parse_strings(pm_parser_t *parser) {
|
|
11301
12172
|
// start. In that case we'll create an empty content token and
|
11302
12173
|
// return an uninterpolated string.
|
11303
12174
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
11304
|
-
|
12175
|
+
pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
|
12176
|
+
|
12177
|
+
pm_string_shared_init(&string->unescaped, content.start, content.end);
|
12178
|
+
node = (pm_node_t *) string;
|
11305
12179
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
11306
12180
|
// If we get here, then we have an end of a label immediately
|
11307
12181
|
// after a start. In that case we'll create an empty symbol
|
11308
12182
|
// node.
|
11309
12183
|
pm_token_t opening = not_provided(parser);
|
11310
12184
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
11311
|
-
|
12185
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
|
12186
|
+
|
12187
|
+
pm_string_shared_init(&symbol->unescaped, content.start, content.end);
|
12188
|
+
node = (pm_node_t *) symbol;
|
11312
12189
|
} else if (!lex_interpolation) {
|
11313
12190
|
// If we don't accept interpolation then we expect the string to
|
11314
12191
|
// start with a single string content node.
|
12192
|
+
pm_string_t unescaped;
|
12193
|
+
if (match1(parser, PM_TOKEN_EOF)) {
|
12194
|
+
unescaped = PM_EMPTY_STRING;
|
12195
|
+
} else {
|
12196
|
+
unescaped = parser->current_string;
|
12197
|
+
}
|
12198
|
+
|
11315
12199
|
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
|
11316
12200
|
pm_token_t content = parser->previous;
|
11317
12201
|
|
@@ -11330,21 +12214,22 @@ parse_strings(pm_parser_t *parser) {
|
|
11330
12214
|
pm_node_list_t parts = PM_EMPTY_NODE_LIST;
|
11331
12215
|
|
11332
12216
|
pm_token_t delimiters = not_provided(parser);
|
11333
|
-
pm_node_t *part = (pm_node_t *)
|
12217
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
|
11334
12218
|
pm_node_list_append(&parts, part);
|
11335
12219
|
|
11336
|
-
|
11337
|
-
part = (pm_node_t *)
|
12220
|
+
do {
|
12221
|
+
part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
|
11338
12222
|
pm_node_list_append(&parts, part);
|
11339
|
-
|
12223
|
+
parser_lex(parser);
|
12224
|
+
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
11340
12225
|
|
11341
12226
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
|
11342
12227
|
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
11343
|
-
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
11344
|
-
node = (pm_node_t *)
|
12228
|
+
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
12229
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
11345
12230
|
} else {
|
11346
12231
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
|
11347
|
-
node = (pm_node_t *)
|
12232
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
11348
12233
|
}
|
11349
12234
|
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
11350
12235
|
// In this case we've hit string content so we know the string
|
@@ -11352,12 +12237,14 @@ parse_strings(pm_parser_t *parser) {
|
|
11352
12237
|
// following token is the end (in which case we can return a
|
11353
12238
|
// plain string) or if it's not then it has interpolation.
|
11354
12239
|
pm_token_t content = parser->current;
|
12240
|
+
pm_string_t unescaped = parser->current_string;
|
11355
12241
|
parser_lex(parser);
|
11356
12242
|
|
11357
|
-
if (
|
11358
|
-
node = (pm_node_t *)
|
12243
|
+
if (match1(parser, PM_TOKEN_STRING_END)) {
|
12244
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
12245
|
+
parser_lex(parser);
|
11359
12246
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
11360
|
-
node = (pm_node_t *)
|
12247
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
11361
12248
|
} else {
|
11362
12249
|
// If we get here, then we have interpolation so we'll need
|
11363
12250
|
// to create a string or symbol node with interpolation.
|
@@ -11365,7 +12252,7 @@ parse_strings(pm_parser_t *parser) {
|
|
11365
12252
|
pm_token_t string_opening = not_provided(parser);
|
11366
12253
|
pm_token_t string_closing = not_provided(parser);
|
11367
12254
|
|
11368
|
-
pm_node_t *part = (pm_node_t *)
|
12255
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
|
11369
12256
|
pm_node_list_append(&parts, part);
|
11370
12257
|
|
11371
12258
|
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
@@ -11374,7 +12261,7 @@ parse_strings(pm_parser_t *parser) {
|
|
11374
12261
|
}
|
11375
12262
|
}
|
11376
12263
|
|
11377
|
-
if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
12264
|
+
if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
11378
12265
|
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
11379
12266
|
} else {
|
11380
12267
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
@@ -11382,11 +12269,11 @@ parse_strings(pm_parser_t *parser) {
|
|
11382
12269
|
}
|
11383
12270
|
}
|
11384
12271
|
} else {
|
11385
|
-
// If we get here, then the first part of the string is not
|
11386
|
-
//
|
11387
|
-
//
|
12272
|
+
// If we get here, then the first part of the string is not plain
|
12273
|
+
// string content, in which case we need to parse the string as an
|
12274
|
+
// interpolated string.
|
11388
12275
|
pm_node_list_t parts = PM_EMPTY_NODE_LIST;
|
11389
|
-
pm_node_t *part
|
12276
|
+
pm_node_t *part;
|
11390
12277
|
|
11391
12278
|
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
11392
12279
|
if ((part = parse_string_part(parser)) != NULL) {
|
@@ -11418,7 +12305,7 @@ parse_strings(pm_parser_t *parser) {
|
|
11418
12305
|
// If it cannot be concatenated with the previous node, then we'll
|
11419
12306
|
// need to add a syntax error.
|
11420
12307
|
if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
|
11421
|
-
|
12308
|
+
pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
|
11422
12309
|
}
|
11423
12310
|
|
11424
12311
|
// Either way we will create a concat node to hold the strings
|
@@ -11464,7 +12351,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11464
12351
|
element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
|
11465
12352
|
} else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
|
11466
12353
|
if (parsed_bare_hash) {
|
11467
|
-
|
12354
|
+
pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
|
11468
12355
|
}
|
11469
12356
|
|
11470
12357
|
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
|
@@ -11480,7 +12367,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11480
12367
|
|
11481
12368
|
if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
|
11482
12369
|
if (parsed_bare_hash) {
|
11483
|
-
|
12370
|
+
pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
|
11484
12371
|
}
|
11485
12372
|
|
11486
12373
|
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
|
@@ -11598,7 +12485,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11598
12485
|
// If we didn't find a terminator and we didn't find a right
|
11599
12486
|
// parenthesis, then this is a syntax error.
|
11600
12487
|
if (!terminator_found) {
|
11601
|
-
|
12488
|
+
pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
|
11602
12489
|
}
|
11603
12490
|
|
11604
12491
|
// Parse each statement within the parentheses.
|
@@ -11627,7 +12514,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11627
12514
|
} else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
11628
12515
|
break;
|
11629
12516
|
} else {
|
11630
|
-
|
12517
|
+
pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
|
11631
12518
|
}
|
11632
12519
|
}
|
11633
12520
|
|
@@ -11665,7 +12552,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11665
12552
|
content.start = content.start + 1;
|
11666
12553
|
|
11667
12554
|
pm_token_t closing = not_provided(parser);
|
11668
|
-
pm_node_t *node = (pm_node_t *)
|
12555
|
+
pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
|
11669
12556
|
|
11670
12557
|
// Characters can be followed by strings in which case they are
|
11671
12558
|
// automatically concatenated.
|
@@ -11839,9 +12726,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11839
12726
|
case PM_TOKEN_HEREDOC_START: {
|
11840
12727
|
// Here we have found a heredoc. We'll parse it and add it to the
|
11841
12728
|
// list of strings.
|
11842
|
-
|
11843
|
-
|
11844
|
-
|
12729
|
+
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
12730
|
+
assert(lex_mode->mode == PM_LEX_HEREDOC);
|
12731
|
+
pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
|
12732
|
+
pm_heredoc_indent_t indent = lex_mode->as.heredoc.indent;
|
11845
12733
|
|
11846
12734
|
parser_lex(parser);
|
11847
12735
|
pm_token_t opening = parser->previous;
|
@@ -11857,9 +12745,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11857
12745
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
11858
12746
|
|
11859
12747
|
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
11860
|
-
node = (pm_node_t *)
|
12748
|
+
node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_EMPTY_STRING);
|
11861
12749
|
} else {
|
11862
|
-
node = (pm_node_t *)
|
12750
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_EMPTY_STRING);
|
11863
12751
|
}
|
11864
12752
|
|
11865
12753
|
node->location.end = opening.end;
|
@@ -11884,15 +12772,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11884
12772
|
cast->base.type = PM_X_STRING_NODE;
|
11885
12773
|
}
|
11886
12774
|
|
11887
|
-
|
11888
|
-
|
12775
|
+
size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
|
12776
|
+
if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
|
12777
|
+
parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
|
12778
|
+
}
|
11889
12779
|
|
11890
12780
|
node = (pm_node_t *) cast;
|
11891
|
-
|
11892
|
-
|
11893
|
-
int common_whitespace = parse_heredoc_common_whitespace_for_single_node(parser, node, -1);
|
11894
|
-
parse_heredoc_dedent_single_node(parser, &cast->unescaped, true, common_whitespace, quote);
|
11895
|
-
}
|
12781
|
+
lex_state_set(parser, PM_LEX_STATE_END);
|
12782
|
+
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
11896
12783
|
} else {
|
11897
12784
|
// If we get here, then we have multiple parts in the heredoc,
|
11898
12785
|
// so we'll need to create an interpolated string node to hold
|
@@ -11931,8 +12818,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11931
12818
|
|
11932
12819
|
// If this is a heredoc that is indented with a ~, then we need
|
11933
12820
|
// to dedent each line by the common leading whitespace.
|
11934
|
-
|
11935
|
-
|
12821
|
+
size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
|
12822
|
+
if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
|
12823
|
+
pm_node_list_t *nodes;
|
12824
|
+
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
12825
|
+
nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
|
12826
|
+
} else {
|
12827
|
+
nodes = &((pm_interpolated_string_node_t *) node)->parts;
|
12828
|
+
}
|
12829
|
+
|
12830
|
+
parse_heredoc_dedent(parser, nodes, common_whitespace);
|
11936
12831
|
}
|
11937
12832
|
}
|
11938
12833
|
|
@@ -11995,10 +12890,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
11995
12890
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
11996
12891
|
if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
|
11997
12892
|
if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
|
11998
|
-
|
12893
|
+
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
|
11999
12894
|
}
|
12000
12895
|
} else {
|
12001
|
-
|
12896
|
+
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
|
12002
12897
|
}
|
12003
12898
|
|
12004
12899
|
return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
|
@@ -12006,7 +12901,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12006
12901
|
case PM_SYMBOL_NODE:
|
12007
12902
|
case PM_INTERPOLATED_SYMBOL_NODE: {
|
12008
12903
|
if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
|
12009
|
-
|
12904
|
+
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
|
12010
12905
|
}
|
12011
12906
|
}
|
12012
12907
|
/* fallthrough */
|
@@ -12032,7 +12927,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12032
12927
|
}
|
12033
12928
|
|
12034
12929
|
if (accept1(parser, PM_TOKEN_KEYWORD_END)) {
|
12035
|
-
|
12930
|
+
pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
|
12036
12931
|
return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, NULL, &parser->previous);
|
12037
12932
|
}
|
12038
12933
|
|
@@ -12142,7 +13037,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12142
13037
|
// If we didn't parse any conditions (in or when) then we need to
|
12143
13038
|
// indicate that we have an error.
|
12144
13039
|
if (case_node->conditions.size == 0) {
|
12145
|
-
|
13040
|
+
pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
|
12146
13041
|
}
|
12147
13042
|
|
12148
13043
|
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
|
@@ -12185,12 +13080,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12185
13080
|
pm_begin_node_end_keyword_set(begin_node, &parser->previous);
|
12186
13081
|
|
12187
13082
|
if ((begin_node->else_clause != NULL) && (begin_node->rescue_clause == NULL)) {
|
12188
|
-
|
12189
|
-
&parser->error_list,
|
12190
|
-
begin_node->else_clause->base.location.start,
|
12191
|
-
begin_node->else_clause->base.location.end,
|
12192
|
-
PM_ERR_BEGIN_LONELY_ELSE
|
12193
|
-
);
|
13083
|
+
pm_parser_err_node(parser, (pm_node_t *) begin_node->else_clause, PM_ERR_BEGIN_LONELY_ELSE);
|
12194
13084
|
}
|
12195
13085
|
|
12196
13086
|
return (pm_node_t *) begin_node;
|
@@ -12206,7 +13096,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12206
13096
|
expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
|
12207
13097
|
pm_context_t context = parser->current_context->context;
|
12208
13098
|
if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
|
12209
|
-
|
13099
|
+
pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
|
12210
13100
|
}
|
12211
13101
|
return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
|
12212
13102
|
}
|
@@ -12239,7 +13129,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12239
13129
|
(parser->current_context->context == PM_CONTEXT_CLASS) ||
|
12240
13130
|
(parser->current_context->context == PM_CONTEXT_MODULE)
|
12241
13131
|
) {
|
12242
|
-
|
13132
|
+
pm_parser_err_current(parser, PM_ERR_RETURN_INVALID);
|
12243
13133
|
}
|
12244
13134
|
return (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
|
12245
13135
|
}
|
@@ -12305,7 +13195,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12305
13195
|
pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_CLASS_NAME);
|
12306
13196
|
pm_token_t name = parser->previous;
|
12307
13197
|
if (name.type != PM_TOKEN_CONSTANT) {
|
12308
|
-
|
13198
|
+
pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
|
12309
13199
|
}
|
12310
13200
|
|
12311
13201
|
pm_token_t inheritance_operator;
|
@@ -12346,7 +13236,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12346
13236
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
|
12347
13237
|
|
12348
13238
|
if (context_def_p(parser)) {
|
12349
|
-
|
13239
|
+
pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
|
12350
13240
|
}
|
12351
13241
|
|
12352
13242
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
@@ -12354,7 +13244,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12354
13244
|
pm_do_loop_stack_pop(parser);
|
12355
13245
|
|
12356
13246
|
if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
|
12357
|
-
|
13247
|
+
pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
|
12358
13248
|
}
|
12359
13249
|
|
12360
13250
|
return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
|
@@ -12486,7 +13376,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12486
13376
|
// If, after all that, we were unable to find a method name, add an
|
12487
13377
|
// error to the error list.
|
12488
13378
|
if (name.type == PM_TOKEN_MISSING) {
|
12489
|
-
|
13379
|
+
pm_parser_err_previous(parser, PM_ERR_DEF_NAME);
|
12490
13380
|
}
|
12491
13381
|
|
12492
13382
|
pm_token_t lparen;
|
@@ -12538,7 +13428,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12538
13428
|
|
12539
13429
|
if (accept1(parser, PM_TOKEN_EQUAL)) {
|
12540
13430
|
if (token_is_setter_name(&name)) {
|
12541
|
-
|
13431
|
+
pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
|
12542
13432
|
}
|
12543
13433
|
equal = parser->previous;
|
12544
13434
|
|
@@ -12656,6 +13546,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12656
13546
|
parser_lex(parser);
|
12657
13547
|
pm_token_t for_keyword = parser->previous;
|
12658
13548
|
pm_node_t *index;
|
13549
|
+
pm_parser_scope_push_transparent(parser);
|
12659
13550
|
|
12660
13551
|
// First, parse out the first index expression.
|
12661
13552
|
if (accept1(parser, PM_TOKEN_USTAR)) {
|
@@ -12670,7 +13561,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12670
13561
|
} else if (token_begins_expression_p(parser->current.type)) {
|
12671
13562
|
index = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
|
12672
13563
|
} else {
|
12673
|
-
|
13564
|
+
pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
|
12674
13565
|
index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
|
12675
13566
|
}
|
12676
13567
|
|
@@ -12681,6 +13572,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12681
13572
|
index = parse_target(parser, index);
|
12682
13573
|
}
|
12683
13574
|
|
13575
|
+
pm_parser_scope_pop(parser);
|
12684
13576
|
pm_do_loop_stack_push(parser, true);
|
12685
13577
|
|
12686
13578
|
expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
|
@@ -12700,8 +13592,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12700
13592
|
pm_statements_node_t *statements = NULL;
|
12701
13593
|
|
12702
13594
|
if (!accept1(parser, PM_TOKEN_KEYWORD_END)) {
|
13595
|
+
pm_parser_scope_push_transparent(parser);
|
12703
13596
|
statements = parse_statements(parser, PM_CONTEXT_FOR);
|
12704
13597
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
|
13598
|
+
pm_parser_scope_pop(parser);
|
12705
13599
|
}
|
12706
13600
|
|
12707
13601
|
return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
|
@@ -12797,7 +13691,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12797
13691
|
// syntax error. We handle that here as well.
|
12798
13692
|
name = parser->previous;
|
12799
13693
|
if (name.type != PM_TOKEN_CONSTANT) {
|
12800
|
-
|
13694
|
+
pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
|
12801
13695
|
}
|
12802
13696
|
|
12803
13697
|
pm_parser_scope_push(parser, true);
|
@@ -12821,7 +13715,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12821
13715
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
|
12822
13716
|
|
12823
13717
|
if (context_def_p(parser)) {
|
12824
|
-
|
13718
|
+
pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
|
12825
13719
|
}
|
12826
13720
|
|
12827
13721
|
return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
|
@@ -12891,13 +13785,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12891
13785
|
accept1(parser, PM_TOKEN_WORDS_SEP);
|
12892
13786
|
if (match1(parser, PM_TOKEN_STRING_END)) break;
|
12893
13787
|
|
12894
|
-
|
12895
|
-
|
12896
|
-
|
12897
|
-
|
13788
|
+
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
13789
|
+
pm_token_t opening = not_provided(parser);
|
13790
|
+
pm_token_t closing = not_provided(parser);
|
13791
|
+
pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
|
13792
|
+
}
|
12898
13793
|
|
12899
|
-
|
12900
|
-
pm_array_node_elements_append(array, symbol);
|
13794
|
+
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
|
12901
13795
|
}
|
12902
13796
|
|
12903
13797
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
|
@@ -12937,26 +13831,26 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
12937
13831
|
// If we hit content and the current node is NULL, then this is
|
12938
13832
|
// the first string content we've seen. In that case we're going
|
12939
13833
|
// to create a new string node and set that to the current.
|
13834
|
+
current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
|
12940
13835
|
parser_lex(parser);
|
12941
|
-
current = (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, PM_UNESCAPE_ALL);
|
12942
13836
|
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
|
12943
13837
|
// If we hit string content and the current node is an
|
12944
13838
|
// interpolated string, then we need to append the string content
|
12945
13839
|
// to the list of child nodes.
|
12946
|
-
pm_node_t *
|
12947
|
-
|
13840
|
+
pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
|
13841
|
+
parser_lex(parser);
|
13842
|
+
|
13843
|
+
pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
|
12948
13844
|
} else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
|
12949
13845
|
// If we hit string content and the current node is a string node,
|
12950
13846
|
// then we need to convert the current node into an interpolated
|
12951
13847
|
// string and add the string content to the list of child nodes.
|
12952
|
-
|
12953
|
-
|
12954
|
-
pm_interpolated_symbol_node_t *interpolated =
|
12955
|
-
pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
|
12956
|
-
pm_interpolated_symbol_node_append(interpolated, current);
|
13848
|
+
pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
|
13849
|
+
parser_lex(parser);
|
12957
13850
|
|
12958
|
-
|
12959
|
-
pm_interpolated_symbol_node_append(interpolated,
|
13851
|
+
pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
|
13852
|
+
pm_interpolated_symbol_node_append(interpolated, current);
|
13853
|
+
pm_interpolated_symbol_node_append(interpolated, string);
|
12960
13854
|
current = (pm_node_t *) interpolated;
|
12961
13855
|
} else {
|
12962
13856
|
assert(false && "unreachable");
|
@@ -13063,12 +13957,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13063
13957
|
accept1(parser, PM_TOKEN_WORDS_SEP);
|
13064
13958
|
if (match1(parser, PM_TOKEN_STRING_END)) break;
|
13065
13959
|
|
13066
|
-
|
13960
|
+
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
13961
|
+
pm_token_t opening = not_provided(parser);
|
13962
|
+
pm_token_t closing = not_provided(parser);
|
13067
13963
|
|
13068
|
-
|
13069
|
-
|
13070
|
-
|
13071
|
-
|
13964
|
+
pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
|
13965
|
+
pm_array_node_elements_append(array, string);
|
13966
|
+
}
|
13967
|
+
|
13968
|
+
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
|
13072
13969
|
}
|
13073
13970
|
|
13074
13971
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
|
@@ -13101,29 +13998,29 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13101
13998
|
break;
|
13102
13999
|
}
|
13103
14000
|
case PM_TOKEN_STRING_CONTENT: {
|
14001
|
+
pm_token_t opening = not_provided(parser);
|
14002
|
+
pm_token_t closing = not_provided(parser);
|
14003
|
+
|
14004
|
+
pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
|
14005
|
+
parser_lex(parser);
|
14006
|
+
|
13104
14007
|
if (current == NULL) {
|
13105
14008
|
// If we hit content and the current node is NULL, then this is
|
13106
14009
|
// the first string content we've seen. In that case we're going
|
13107
14010
|
// to create a new string node and set that to the current.
|
13108
|
-
current =
|
14011
|
+
current = string;
|
13109
14012
|
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
|
13110
14013
|
// If we hit string content and the current node is an
|
13111
14014
|
// interpolated string, then we need to append the string content
|
13112
14015
|
// to the list of child nodes.
|
13113
|
-
|
13114
|
-
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
|
14016
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
|
13115
14017
|
} else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
|
13116
14018
|
// If we hit string content and the current node is a string node,
|
13117
14019
|
// then we need to convert the current node into an interpolated
|
13118
14020
|
// string and add the string content to the list of child nodes.
|
13119
|
-
|
13120
|
-
pm_token_t closing = not_provided(parser);
|
13121
|
-
pm_interpolated_string_node_t *interpolated =
|
13122
|
-
pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
14021
|
+
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
13123
14022
|
pm_interpolated_string_node_append(interpolated, current);
|
13124
|
-
|
13125
|
-
pm_node_t *part = parse_string_part(parser);
|
13126
|
-
pm_interpolated_string_node_append(interpolated, part);
|
14023
|
+
pm_interpolated_string_node_append(interpolated, string);
|
13127
14024
|
current = (pm_node_t *) interpolated;
|
13128
14025
|
} else {
|
13129
14026
|
assert(false && "unreachable");
|
@@ -13218,7 +14115,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13218
14115
|
};
|
13219
14116
|
|
13220
14117
|
parser_lex(parser);
|
13221
|
-
return (pm_node_t *)
|
14118
|
+
return (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
|
13222
14119
|
}
|
13223
14120
|
|
13224
14121
|
pm_interpolated_regular_expression_node_t *node;
|
@@ -13228,6 +14125,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13228
14125
|
// expression at least has something in it. We'll need to check if the
|
13229
14126
|
// following token is the end (in which case we can return a plain
|
13230
14127
|
// regular expression) or if it's not then it has interpolation.
|
14128
|
+
pm_string_t unescaped = parser->current_string;
|
13231
14129
|
pm_token_t content = parser->current;
|
13232
14130
|
parser_lex(parser);
|
13233
14131
|
|
@@ -13235,7 +14133,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13235
14133
|
// without interpolation, which can be represented more succinctly and
|
13236
14134
|
// more easily compiled.
|
13237
14135
|
if (accept1(parser, PM_TOKEN_REGEXP_END)) {
|
13238
|
-
return (pm_node_t *)
|
14136
|
+
return (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
13239
14137
|
}
|
13240
14138
|
|
13241
14139
|
// If we get here, then we have interpolation so we'll need to create
|
@@ -13244,7 +14142,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13244
14142
|
|
13245
14143
|
pm_token_t opening = not_provided(parser);
|
13246
14144
|
pm_token_t closing = not_provided(parser);
|
13247
|
-
pm_node_t *part = (pm_node_t *)
|
14145
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
|
13248
14146
|
pm_interpolated_regular_expression_node_append(node, part);
|
13249
14147
|
} else {
|
13250
14148
|
// If the first part of the body of the regular expression is not a
|
@@ -13255,9 +14153,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13255
14153
|
|
13256
14154
|
// Now that we're here and we have interpolation, we'll parse all of the
|
13257
14155
|
// parts into the list.
|
14156
|
+
pm_node_t *part;
|
13258
14157
|
while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
|
13259
|
-
|
13260
|
-
if (part != NULL) {
|
14158
|
+
if ((part = parse_string_part(parser)) != NULL) {
|
13261
14159
|
pm_interpolated_regular_expression_node_append(node, part);
|
13262
14160
|
}
|
13263
14161
|
}
|
@@ -13293,35 +14191,37 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
|
|
13293
14191
|
pm_interpolated_x_string_node_t *node;
|
13294
14192
|
|
13295
14193
|
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
13296
|
-
// In this case we've hit string content so we know the string
|
13297
|
-
// has something in it. We'll need to check if the
|
13298
|
-
// the end (in which case we can return a
|
13299
|
-
// then it has interpolation.
|
14194
|
+
// In this case we've hit string content so we know the string
|
14195
|
+
// at least has something in it. We'll need to check if the
|
14196
|
+
// following token is the end (in which case we can return a
|
14197
|
+
// plain string) or if it's not then it has interpolation.
|
14198
|
+
pm_string_t unescaped = parser->current_string;
|
13300
14199
|
pm_token_t content = parser->current;
|
13301
14200
|
parser_lex(parser);
|
13302
14201
|
|
13303
14202
|
if (accept1(parser, PM_TOKEN_STRING_END)) {
|
13304
|
-
return (pm_node_t *)
|
14203
|
+
return (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
13305
14204
|
}
|
13306
14205
|
|
13307
|
-
// If we get here, then we have interpolation so we'll need to
|
13308
|
-
// a string node with interpolation.
|
14206
|
+
// If we get here, then we have interpolation so we'll need to
|
14207
|
+
// create a string node with interpolation.
|
13309
14208
|
node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
|
13310
14209
|
|
13311
14210
|
pm_token_t opening = not_provided(parser);
|
13312
14211
|
pm_token_t closing = not_provided(parser);
|
13313
|
-
pm_node_t *part = (pm_node_t *)
|
14212
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
|
14213
|
+
|
13314
14214
|
pm_interpolated_xstring_node_append(node, part);
|
13315
14215
|
} else {
|
13316
|
-
// If the first part of the body of the string is not a string
|
13317
|
-
// then we have interpolation and we need to create an
|
13318
|
-
// string node.
|
14216
|
+
// If the first part of the body of the string is not a string
|
14217
|
+
// content, then we have interpolation and we need to create an
|
14218
|
+
// interpolated string node.
|
13319
14219
|
node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
|
13320
14220
|
}
|
13321
14221
|
|
14222
|
+
pm_node_t *part;
|
13322
14223
|
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
13323
|
-
|
13324
|
-
if (part != NULL) {
|
14224
|
+
if ((part = parse_string_part(parser)) != NULL) {
|
13325
14225
|
pm_interpolated_xstring_node_append(node, part);
|
13326
14226
|
}
|
13327
14227
|
}
|
@@ -13542,7 +14442,7 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_
|
|
13542
14442
|
static void
|
13543
14443
|
parse_call_operator_write_block(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
|
13544
14444
|
if (call_node->block != NULL) {
|
13545
|
-
|
14445
|
+
pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
|
13546
14446
|
pm_node_destroy(parser, (pm_node_t *) call_node->block);
|
13547
14447
|
call_node->block = NULL;
|
13548
14448
|
}
|
@@ -13590,7 +14490,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13590
14490
|
// In this case we have an = sign, but we don't know what it's for. We
|
13591
14491
|
// need to treat it as an error. For now, we'll mark it as an error
|
13592
14492
|
// and just skip right past it.
|
13593
|
-
|
14493
|
+
pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
|
13594
14494
|
return node;
|
13595
14495
|
}
|
13596
14496
|
}
|
@@ -13598,7 +14498,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13598
14498
|
switch (PM_NODE_TYPE(node)) {
|
13599
14499
|
case PM_BACK_REFERENCE_READ_NODE:
|
13600
14500
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
13601
|
-
|
14501
|
+
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
13602
14502
|
/* fallthrough */
|
13603
14503
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
13604
14504
|
parser_lex(parser);
|
@@ -13661,7 +14561,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13661
14561
|
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
|
13662
14562
|
|
13663
14563
|
if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
|
13664
|
-
|
14564
|
+
pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
13665
14565
|
}
|
13666
14566
|
|
13667
14567
|
parser_lex(parser);
|
@@ -13683,7 +14583,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13683
14583
|
}
|
13684
14584
|
case PM_MULTI_WRITE_NODE: {
|
13685
14585
|
parser_lex(parser);
|
13686
|
-
|
14586
|
+
pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
|
13687
14587
|
return node;
|
13688
14588
|
}
|
13689
14589
|
default:
|
@@ -13692,7 +14592,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13692
14592
|
// In this case we have an &&= sign, but we don't know what it's for.
|
13693
14593
|
// We need to treat it as an error. For now, we'll mark it as an error
|
13694
14594
|
// and just skip right past it.
|
13695
|
-
|
14595
|
+
pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
|
13696
14596
|
return node;
|
13697
14597
|
}
|
13698
14598
|
}
|
@@ -13700,7 +14600,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13700
14600
|
switch (PM_NODE_TYPE(node)) {
|
13701
14601
|
case PM_BACK_REFERENCE_READ_NODE:
|
13702
14602
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
13703
|
-
|
14603
|
+
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
13704
14604
|
/* fallthrough */
|
13705
14605
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
13706
14606
|
parser_lex(parser);
|
@@ -13763,7 +14663,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13763
14663
|
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
|
13764
14664
|
|
13765
14665
|
if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
|
13766
|
-
|
14666
|
+
pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
13767
14667
|
}
|
13768
14668
|
|
13769
14669
|
parser_lex(parser);
|
@@ -13785,7 +14685,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13785
14685
|
}
|
13786
14686
|
case PM_MULTI_WRITE_NODE: {
|
13787
14687
|
parser_lex(parser);
|
13788
|
-
|
14688
|
+
pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
|
13789
14689
|
return node;
|
13790
14690
|
}
|
13791
14691
|
default:
|
@@ -13794,7 +14694,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13794
14694
|
// In this case we have an ||= sign, but we don't know what it's for.
|
13795
14695
|
// We need to treat it as an error. For now, we'll mark it as an error
|
13796
14696
|
// and just skip right past it.
|
13797
|
-
|
14697
|
+
pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
|
13798
14698
|
return node;
|
13799
14699
|
}
|
13800
14700
|
}
|
@@ -13812,7 +14712,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13812
14712
|
switch (PM_NODE_TYPE(node)) {
|
13813
14713
|
case PM_BACK_REFERENCE_READ_NODE:
|
13814
14714
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
13815
|
-
|
14715
|
+
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
13816
14716
|
/* fallthrough */
|
13817
14717
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
13818
14718
|
parser_lex(parser);
|
@@ -13875,7 +14775,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13875
14775
|
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
|
13876
14776
|
|
13877
14777
|
if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
|
13878
|
-
|
14778
|
+
pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
|
13879
14779
|
}
|
13880
14780
|
|
13881
14781
|
parser_lex(parser);
|
@@ -13897,7 +14797,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13897
14797
|
}
|
13898
14798
|
case PM_MULTI_WRITE_NODE: {
|
13899
14799
|
parser_lex(parser);
|
13900
|
-
|
14800
|
+
pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
|
13901
14801
|
return node;
|
13902
14802
|
}
|
13903
14803
|
default:
|
@@ -13906,7 +14806,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
13906
14806
|
// In this case we have an operator but we don't know what it's for.
|
13907
14807
|
// We need to treat it as an error. For now, we'll mark it as an error
|
13908
14808
|
// and just skip right past it.
|
13909
|
-
|
14809
|
+
pm_parser_err_previous(parser, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
|
13910
14810
|
return node;
|
13911
14811
|
}
|
13912
14812
|
}
|
@@ -14021,7 +14921,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
14021
14921
|
break;
|
14022
14922
|
}
|
14023
14923
|
default: {
|
14024
|
-
|
14924
|
+
pm_parser_err_current(parser, PM_ERR_DEF_NAME);
|
14025
14925
|
message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
|
14026
14926
|
}
|
14027
14927
|
}
|
@@ -14172,7 +15072,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
14172
15072
|
return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
|
14173
15073
|
}
|
14174
15074
|
default: {
|
14175
|
-
|
15075
|
+
pm_parser_err_token(parser, &delimiter, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
14176
15076
|
pm_node_t *child = (pm_node_t *) pm_missing_node_create(parser, delimiter.start, delimiter.end);
|
14177
15077
|
return (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
|
14178
15078
|
}
|
@@ -14220,7 +15120,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
14220
15120
|
|
14221
15121
|
if (block != NULL) {
|
14222
15122
|
if (arguments.block != NULL) {
|
14223
|
-
|
15123
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
|
14224
15124
|
if (arguments.arguments == NULL) {
|
14225
15125
|
arguments.arguments = pm_arguments_node_create(parser);
|
14226
15126
|
}
|
@@ -14283,7 +15183,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagn
|
|
14283
15183
|
// parse_expression_prefix is going to be a missing node. In that case we need
|
14284
15184
|
// to add the error message to the parser's error list.
|
14285
15185
|
if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
|
14286
|
-
|
15186
|
+
pm_parser_err(parser, recovery.end, recovery.end, diag_id);
|
14287
15187
|
return node;
|
14288
15188
|
}
|
14289
15189
|
|
@@ -14428,6 +15328,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch
|
|
14428
15328
|
.next_start = NULL,
|
14429
15329
|
.heredoc_end = NULL,
|
14430
15330
|
.comment_list = PM_LIST_EMPTY,
|
15331
|
+
.magic_comment_list = PM_LIST_EMPTY,
|
14431
15332
|
.warning_list = PM_LIST_EMPTY,
|
14432
15333
|
.error_list = PM_LIST_EMPTY,
|
14433
15334
|
.current_scope = NULL,
|
@@ -14441,6 +15342,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch
|
|
14441
15342
|
.constant_pool = PM_CONSTANT_POOL_EMPTY,
|
14442
15343
|
.newline_list = PM_NEWLINE_LIST_EMPTY,
|
14443
15344
|
.integer_base = 0,
|
15345
|
+
.current_string = PM_EMPTY_STRING,
|
14444
15346
|
.command_start = true,
|
14445
15347
|
.recovering = false,
|
14446
15348
|
.encoding_changed = false,
|
@@ -14521,6 +15423,19 @@ pm_comment_list_free(pm_list_t *list) {
|
|
14521
15423
|
}
|
14522
15424
|
}
|
14523
15425
|
|
15426
|
+
// Free all of the memory associated with the magic comment list.
|
15427
|
+
static inline void
|
15428
|
+
pm_magic_comment_list_free(pm_list_t *list) {
|
15429
|
+
pm_list_node_t *node, *next;
|
15430
|
+
|
15431
|
+
for (node = list->head; node != NULL; node = next) {
|
15432
|
+
next = node->next;
|
15433
|
+
|
15434
|
+
pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node;
|
15435
|
+
free(magic_comment);
|
15436
|
+
}
|
15437
|
+
}
|
15438
|
+
|
14524
15439
|
// Free any memory associated with the given parser.
|
14525
15440
|
PRISM_EXPORTED_FUNCTION void
|
14526
15441
|
pm_parser_free(pm_parser_t *parser) {
|
@@ -14528,6 +15443,7 @@ pm_parser_free(pm_parser_t *parser) {
|
|
14528
15443
|
pm_diagnostic_list_free(&parser->error_list);
|
14529
15444
|
pm_diagnostic_list_free(&parser->warning_list);
|
14530
15445
|
pm_comment_list_free(&parser->comment_list);
|
15446
|
+
pm_magic_comment_list_free(&parser->magic_comment_list);
|
14531
15447
|
pm_constant_pool_free(&parser->constant_pool);
|
14532
15448
|
pm_newline_list_free(&parser->newline_list);
|
14533
15449
|
|
@@ -14578,10 +15494,11 @@ pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, cons
|
|
14578
15494
|
pm_parser_free(&parser);
|
14579
15495
|
}
|
14580
15496
|
|
14581
|
-
#undef PM_LOCATION_NULL_VALUE
|
14582
|
-
#undef PM_LOCATION_TOKEN_VALUE
|
14583
|
-
#undef PM_LOCATION_NODE_VALUE
|
14584
|
-
#undef PM_LOCATION_NODE_BASE_VALUE
|
14585
15497
|
#undef PM_CASE_KEYWORD
|
14586
15498
|
#undef PM_CASE_OPERATOR
|
14587
15499
|
#undef PM_CASE_WRITABLE
|
15500
|
+
#undef PM_EMPTY_STRING
|
15501
|
+
#undef PM_LOCATION_NODE_BASE_VALUE
|
15502
|
+
#undef PM_LOCATION_NODE_VALUE
|
15503
|
+
#undef PM_LOCATION_NULL_VALUE
|
15504
|
+
#undef PM_LOCATION_TOKEN_VALUE
|