prism 0.28.0 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -1
- data/CONTRIBUTING.md +0 -4
- data/README.md +1 -0
- data/config.yml +95 -26
- data/docs/fuzzing.md +1 -1
- data/docs/ripper_translation.md +22 -0
- data/ext/prism/api_node.c +70 -52
- data/ext/prism/extconf.rb +27 -23
- data/ext/prism/extension.c +107 -372
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +170 -102
- data/include/prism/diagnostic.h +18 -3
- data/include/prism/node.h +0 -21
- data/include/prism/parser.h +23 -25
- data/include/prism/regexp.h +17 -8
- data/include/prism/static_literals.h +3 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +0 -8
- data/include/prism/util/pm_integer.h +16 -9
- data/include/prism/util/pm_string.h +0 -8
- data/include/prism/version.h +2 -2
- data/include/prism.h +0 -11
- data/lib/prism/compiler.rb +3 -0
- data/lib/prism/desugar_compiler.rb +4 -4
- data/lib/prism/dispatcher.rb +14 -0
- data/lib/prism/dot_visitor.rb +54 -35
- data/lib/prism/dsl.rb +23 -18
- data/lib/prism/ffi.rb +25 -4
- data/lib/prism/inspect_visitor.rb +26 -24
- data/lib/prism/mutation_compiler.rb +6 -1
- data/lib/prism/node.rb +314 -389
- data/lib/prism/node_ext.rb +175 -17
- data/lib/prism/parse_result/comments.rb +1 -8
- data/lib/prism/parse_result/newlines.rb +102 -12
- data/lib/prism/parse_result.rb +17 -0
- data/lib/prism/reflection.rb +11 -9
- data/lib/prism/serialize.rb +91 -68
- data/lib/prism/translation/parser/compiler.rb +288 -138
- data/lib/prism/translation/parser.rb +7 -2
- data/lib/prism/translation/ripper.rb +24 -22
- data/lib/prism/translation/ruby_parser.rb +32 -14
- data/lib/prism/visitor.rb +3 -0
- data/lib/prism.rb +0 -4
- data/prism.gemspec +2 -4
- data/rbi/prism/node.rbi +114 -57
- data/rbi/prism/node_ext.rbi +5 -0
- data/rbi/prism/parse_result.rbi +1 -1
- data/rbi/prism/visitor.rbi +3 -0
- data/rbi/prism.rbi +6 -0
- data/sig/prism/dsl.rbs +13 -10
- data/sig/prism/lex_compat.rbs +10 -0
- data/sig/prism/mutation_compiler.rbs +1 -0
- data/sig/prism/node.rbs +72 -48
- data/sig/prism/node_ext.rbs +4 -0
- data/sig/prism/visitor.rbs +1 -0
- data/sig/prism.rbs +21 -0
- data/src/diagnostic.c +56 -27
- data/src/node.c +432 -1690
- data/src/prettyprint.c +97 -54
- data/src/prism.c +1286 -1196
- data/src/regexp.c +133 -68
- data/src/serialize.c +22 -17
- data/src/static_literals.c +63 -84
- data/src/token_type.c +4 -4
- data/src/util/pm_constant_pool.c +0 -8
- data/src/util/pm_integer.c +39 -11
- data/src/util/pm_string.c +0 -12
- data/src/util/pm_strpbrk.c +32 -6
- metadata +3 -5
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -249
- data/src/util/pm_string_list.c +0 -28
data/src/prism.c
CHANGED
@@ -423,7 +423,7 @@ lex_mode_pop(pm_parser_t *parser) {
|
|
423
423
|
* This is the equivalent of IS_lex_state is CRuby.
|
424
424
|
*/
|
425
425
|
static inline bool
|
426
|
-
lex_state_p(pm_parser_t *parser, pm_lex_state_t state) {
|
426
|
+
lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
|
427
427
|
return parser->lex_state & state;
|
428
428
|
}
|
429
429
|
|
@@ -708,7 +708,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
708
708
|
.previous = parser->current_scope,
|
709
709
|
.locals = { 0 },
|
710
710
|
.parameters = PM_SCOPE_PARAMETERS_NONE,
|
711
|
-
.
|
711
|
+
.implicit_parameters = { 0 },
|
712
712
|
.shareable_constant = (closed || parser->current_scope == NULL) ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
|
713
713
|
.closed = closed
|
714
714
|
};
|
@@ -749,42 +749,97 @@ pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
|
|
749
749
|
return scope;
|
750
750
|
}
|
751
751
|
|
752
|
-
|
753
|
-
|
752
|
+
typedef enum {
|
753
|
+
PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
|
754
|
+
PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
|
755
|
+
PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
|
756
|
+
} pm_scope_forwarding_param_check_result_t;
|
757
|
+
|
758
|
+
static pm_scope_forwarding_param_check_result_t
|
759
|
+
pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
|
754
760
|
pm_scope_t *scope = parser->current_scope;
|
755
|
-
|
761
|
+
bool conflict = false;
|
762
|
+
|
763
|
+
while (scope != NULL) {
|
756
764
|
if (scope->parameters & mask) {
|
757
|
-
if (
|
758
|
-
|
759
|
-
|
765
|
+
if (scope->closed) {
|
766
|
+
if (conflict) {
|
767
|
+
return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
|
768
|
+
} else {
|
769
|
+
return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
|
770
|
+
}
|
760
771
|
}
|
761
|
-
|
772
|
+
|
773
|
+
conflict = true;
|
762
774
|
}
|
775
|
+
|
763
776
|
if (scope->closed) break;
|
764
777
|
scope = scope->previous;
|
765
778
|
}
|
766
779
|
|
767
|
-
|
780
|
+
return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
|
768
781
|
}
|
769
782
|
|
770
|
-
static
|
783
|
+
static void
|
771
784
|
pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
|
772
|
-
pm_parser_scope_forwarding_param_check(parser,
|
785
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
|
786
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
787
|
+
// Pass.
|
788
|
+
break;
|
789
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
790
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
|
791
|
+
break;
|
792
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
793
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
|
794
|
+
break;
|
795
|
+
}
|
773
796
|
}
|
774
797
|
|
775
|
-
static
|
798
|
+
static void
|
776
799
|
pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
|
777
|
-
pm_parser_scope_forwarding_param_check(parser,
|
800
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
|
801
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
802
|
+
// Pass.
|
803
|
+
break;
|
804
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
805
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
|
806
|
+
break;
|
807
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
808
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
809
|
+
break;
|
810
|
+
}
|
778
811
|
}
|
779
812
|
|
780
|
-
static
|
781
|
-
pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *
|
782
|
-
pm_parser_scope_forwarding_param_check(parser,
|
813
|
+
static void
|
814
|
+
pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
|
815
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
|
816
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
817
|
+
// Pass.
|
818
|
+
break;
|
819
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
820
|
+
// This shouldn't happen, because ... is not allowed in the
|
821
|
+
// declaration of blocks. If we get here, we assume we already have
|
822
|
+
// an error for this.
|
823
|
+
break;
|
824
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
825
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
826
|
+
break;
|
827
|
+
}
|
783
828
|
}
|
784
829
|
|
785
|
-
static
|
830
|
+
static void
|
786
831
|
pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
|
787
|
-
pm_parser_scope_forwarding_param_check(parser,
|
832
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
|
833
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
834
|
+
// Pass.
|
835
|
+
break;
|
836
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
837
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
|
838
|
+
break;
|
839
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
840
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
|
841
|
+
break;
|
842
|
+
}
|
788
843
|
}
|
789
844
|
|
790
845
|
/**
|
@@ -1128,6 +1183,31 @@ pm_check_value_expression(pm_node_t *node) {
|
|
1128
1183
|
return NULL;
|
1129
1184
|
case PM_BEGIN_NODE: {
|
1130
1185
|
pm_begin_node_t *cast = (pm_begin_node_t *) node;
|
1186
|
+
|
1187
|
+
if (cast->statements == NULL && cast->ensure_clause != NULL) {
|
1188
|
+
node = (pm_node_t *) cast->ensure_clause;
|
1189
|
+
}
|
1190
|
+
else {
|
1191
|
+
if (cast->rescue_clause != NULL) {
|
1192
|
+
if (cast->rescue_clause->statements == NULL) {
|
1193
|
+
return NULL;
|
1194
|
+
}
|
1195
|
+
else if (cast->else_clause != NULL) {
|
1196
|
+
node = (pm_node_t *) cast->else_clause;
|
1197
|
+
}
|
1198
|
+
else {
|
1199
|
+
node = (pm_node_t *) cast->statements;
|
1200
|
+
}
|
1201
|
+
}
|
1202
|
+
else {
|
1203
|
+
node = (pm_node_t *) cast->statements;
|
1204
|
+
}
|
1205
|
+
}
|
1206
|
+
|
1207
|
+
break;
|
1208
|
+
}
|
1209
|
+
case PM_ENSURE_NODE: {
|
1210
|
+
pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
|
1131
1211
|
node = (pm_node_t *) cast->statements;
|
1132
1212
|
break;
|
1133
1213
|
}
|
@@ -1575,7 +1655,7 @@ not_provided(pm_parser_t *parser) {
|
|
1575
1655
|
return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
|
1576
1656
|
}
|
1577
1657
|
|
1578
|
-
#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = parser->start, .end = parser->start })
|
1658
|
+
#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
|
1579
1659
|
#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
|
1580
1660
|
#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
|
1581
1661
|
#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
|
@@ -1703,7 +1783,7 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
|
1703
1783
|
* it's important that it be as fast as possible.
|
1704
1784
|
*/
|
1705
1785
|
static inline size_t
|
1706
|
-
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
1786
|
+
char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
|
1707
1787
|
if (parser->encoding_changed) {
|
1708
1788
|
size_t width;
|
1709
1789
|
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
@@ -2772,8 +2852,7 @@ static pm_call_node_t *
|
|
2772
2852
|
pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
|
2773
2853
|
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
|
2774
2854
|
|
2775
|
-
node->base.location
|
2776
|
-
node->base.location.end = parser->start;
|
2855
|
+
node->base.location = PM_LOCATION_NULL_VALUE(parser);
|
2777
2856
|
node->arguments = arguments;
|
2778
2857
|
|
2779
2858
|
node->name = name;
|
@@ -3025,8 +3104,8 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
|
|
3025
3104
|
.message_loc = target->message_loc,
|
3026
3105
|
.read_name = 0,
|
3027
3106
|
.write_name = target->name,
|
3028
|
-
.
|
3029
|
-
.
|
3107
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
3108
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3030
3109
|
.value = value
|
3031
3110
|
};
|
3032
3111
|
|
@@ -3064,8 +3143,8 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
|
|
3064
3143
|
.arguments = target->arguments,
|
3065
3144
|
.closing_loc = target->closing_loc,
|
3066
3145
|
.block = target->block,
|
3067
|
-
.
|
3068
|
-
.
|
3146
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
3147
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3069
3148
|
.value = value
|
3070
3149
|
};
|
3071
3150
|
|
@@ -3409,9 +3488,9 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
|
|
3409
3488
|
},
|
3410
3489
|
.name = target->name,
|
3411
3490
|
.name_loc = target->base.location,
|
3412
|
-
.
|
3491
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3413
3492
|
.value = value,
|
3414
|
-
.
|
3493
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
3415
3494
|
};
|
3416
3495
|
|
3417
3496
|
return node;
|
@@ -3525,9 +3604,9 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
|
|
3525
3604
|
}
|
3526
3605
|
},
|
3527
3606
|
.target = target,
|
3528
|
-
.
|
3607
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3529
3608
|
.value = value,
|
3530
|
-
.
|
3609
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
3531
3610
|
};
|
3532
3611
|
|
3533
3612
|
return node;
|
@@ -3652,9 +3731,9 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
|
|
3652
3731
|
},
|
3653
3732
|
.name = target->name,
|
3654
3733
|
.name_loc = target->base.location,
|
3655
|
-
.
|
3734
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3656
3735
|
.value = value,
|
3657
|
-
.
|
3736
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
3658
3737
|
};
|
3659
3738
|
|
3660
3739
|
return node;
|
@@ -4236,7 +4315,7 @@ pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
4236
4315
|
}
|
4237
4316
|
|
4238
4317
|
/**
|
4239
|
-
* Allocate and initialize a new
|
4318
|
+
* Allocate and initialize a new RationalNode node from a FLOAT_RATIONAL token.
|
4240
4319
|
*/
|
4241
4320
|
static pm_rational_node_t *
|
4242
4321
|
pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
|
@@ -4246,16 +4325,44 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
4246
4325
|
*node = (pm_rational_node_t) {
|
4247
4326
|
{
|
4248
4327
|
.type = PM_RATIONAL_NODE,
|
4249
|
-
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
4328
|
+
.flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
|
4250
4329
|
.location = PM_LOCATION_TOKEN_VALUE(token)
|
4251
4330
|
},
|
4252
|
-
.
|
4253
|
-
|
4254
|
-
.start = token->start,
|
4255
|
-
.end = token->end - 1
|
4256
|
-
}))
|
4331
|
+
.numerator = { 0 },
|
4332
|
+
.denominator = { 0 }
|
4257
4333
|
};
|
4258
4334
|
|
4335
|
+
const uint8_t *start = token->start;
|
4336
|
+
const uint8_t *end = token->end - 1; // r
|
4337
|
+
|
4338
|
+
while (start < end && *start == '0') start++; // 0.1 -> .1
|
4339
|
+
while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
|
4340
|
+
|
4341
|
+
size_t length = (size_t) (end - start);
|
4342
|
+
if (length == 1) {
|
4343
|
+
node->denominator.value = 1;
|
4344
|
+
return node;
|
4345
|
+
}
|
4346
|
+
|
4347
|
+
const uint8_t *point = memchr(start, '.', length);
|
4348
|
+
assert(point && "should have a decimal point");
|
4349
|
+
|
4350
|
+
uint8_t *digits = malloc(length);
|
4351
|
+
if (digits == NULL) {
|
4352
|
+
fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
|
4353
|
+
abort();
|
4354
|
+
}
|
4355
|
+
|
4356
|
+
memcpy(digits, start, (unsigned long) (point - start));
|
4357
|
+
memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
|
4358
|
+
pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
|
4359
|
+
|
4360
|
+
digits[0] = '1';
|
4361
|
+
if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
|
4362
|
+
pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
|
4363
|
+
free(digits);
|
4364
|
+
|
4365
|
+
pm_integers_reduce(&node->numerator, &node->denominator);
|
4259
4366
|
return node;
|
4260
4367
|
}
|
4261
4368
|
|
@@ -4505,9 +4612,9 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
|
|
4505
4612
|
},
|
4506
4613
|
.name = pm_global_variable_write_name(parser, target),
|
4507
4614
|
.name_loc = target->location,
|
4508
|
-
.
|
4615
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
4509
4616
|
.value = value,
|
4510
|
-
.
|
4617
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
4511
4618
|
};
|
4512
4619
|
|
4513
4620
|
return node;
|
@@ -4566,7 +4673,7 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant
|
|
4566
4673
|
*node = (pm_global_variable_read_node_t) {
|
4567
4674
|
{
|
4568
4675
|
.type = PM_GLOBAL_VARIABLE_READ_NODE,
|
4569
|
-
.location =
|
4676
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
4570
4677
|
},
|
4571
4678
|
.name = name
|
4572
4679
|
};
|
@@ -4608,11 +4715,11 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan
|
|
4608
4715
|
*node = (pm_global_variable_write_node_t) {
|
4609
4716
|
{
|
4610
4717
|
.type = PM_GLOBAL_VARIABLE_WRITE_NODE,
|
4611
|
-
.location =
|
4718
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
4612
4719
|
},
|
4613
4720
|
.name = name,
|
4614
|
-
.name_loc =
|
4615
|
-
.operator_loc =
|
4721
|
+
.name_loc = PM_LOCATION_NULL_VALUE(parser),
|
4722
|
+
.operator_loc = PM_LOCATION_NULL_VALUE(parser),
|
4616
4723
|
.value = value
|
4617
4724
|
};
|
4618
4725
|
|
@@ -4889,7 +4996,7 @@ pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, cons
|
|
4889
4996
|
}
|
4890
4997
|
|
4891
4998
|
/**
|
4892
|
-
* Allocate and initialize a new
|
4999
|
+
* Allocate and initialize a new RationalNode node from an INTEGER_RATIONAL
|
4893
5000
|
* token.
|
4894
5001
|
*/
|
4895
5002
|
static pm_rational_node_t *
|
@@ -4900,16 +5007,24 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const
|
|
4900
5007
|
*node = (pm_rational_node_t) {
|
4901
5008
|
{
|
4902
5009
|
.type = PM_RATIONAL_NODE,
|
4903
|
-
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
5010
|
+
.flags = base | PM_NODE_FLAG_STATIC_LITERAL,
|
4904
5011
|
.location = PM_LOCATION_TOKEN_VALUE(token)
|
4905
5012
|
},
|
4906
|
-
.
|
4907
|
-
|
4908
|
-
.start = token->start,
|
4909
|
-
.end = token->end - 1
|
4910
|
-
}))
|
5013
|
+
.numerator = { 0 },
|
5014
|
+
.denominator = { .value = 1, 0 }
|
4911
5015
|
};
|
4912
5016
|
|
5017
|
+
pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
|
5018
|
+
switch (base) {
|
5019
|
+
case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
|
5020
|
+
case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
|
5021
|
+
case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
|
5022
|
+
case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
|
5023
|
+
default: assert(false && "unreachable"); break;
|
5024
|
+
}
|
5025
|
+
|
5026
|
+
pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
|
5027
|
+
|
4913
5028
|
return node;
|
4914
5029
|
}
|
4915
5030
|
|
@@ -5013,9 +5128,9 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
|
|
5013
5128
|
},
|
5014
5129
|
.name = target->name,
|
5015
5130
|
.name_loc = target->base.location,
|
5016
|
-
.
|
5131
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
5017
5132
|
.value = value,
|
5018
|
-
.
|
5133
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
5019
5134
|
};
|
5020
5135
|
|
5021
5136
|
return node;
|
@@ -5407,6 +5522,23 @@ pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node,
|
|
5407
5522
|
node->base.location.end = closing->end;
|
5408
5523
|
}
|
5409
5524
|
|
5525
|
+
/**
|
5526
|
+
* Create a local variable read that is reading the implicit 'it' variable.
|
5527
|
+
*/
|
5528
|
+
static pm_it_local_variable_read_node_t *
|
5529
|
+
pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
|
5530
|
+
pm_it_local_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_it_local_variable_read_node_t);
|
5531
|
+
|
5532
|
+
*node = (pm_it_local_variable_read_node_t) {
|
5533
|
+
{
|
5534
|
+
.type = PM_IT_LOCAL_VARIABLE_READ_NODE,
|
5535
|
+
.location = PM_LOCATION_TOKEN_VALUE(name)
|
5536
|
+
}
|
5537
|
+
};
|
5538
|
+
|
5539
|
+
return node;
|
5540
|
+
}
|
5541
|
+
|
5410
5542
|
/**
|
5411
5543
|
* Allocate and initialize a new ItParametersNode node.
|
5412
5544
|
*/
|
@@ -5609,10 +5741,10 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
|
|
5609
5741
|
}
|
5610
5742
|
},
|
5611
5743
|
.name_loc = target->location,
|
5612
|
-
.
|
5744
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
5613
5745
|
.value = value,
|
5614
5746
|
.name = name,
|
5615
|
-
.
|
5747
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
5616
5748
|
.depth = depth
|
5617
5749
|
};
|
5618
5750
|
|
@@ -5719,28 +5851,6 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
|
|
5719
5851
|
return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
|
5720
5852
|
}
|
5721
5853
|
|
5722
|
-
/**
|
5723
|
-
* Returns true if the given node is `it` default parameter.
|
5724
|
-
*/
|
5725
|
-
static inline bool
|
5726
|
-
pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
|
5727
|
-
// Check if it's a local variable reference
|
5728
|
-
if (node->type != PM_CALL_NODE) {
|
5729
|
-
return false;
|
5730
|
-
}
|
5731
|
-
|
5732
|
-
// Check if it's a variable call
|
5733
|
-
pm_call_node_t *call_node = (pm_call_node_t *) node;
|
5734
|
-
if (!PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
|
5735
|
-
return false;
|
5736
|
-
}
|
5737
|
-
|
5738
|
-
// Check if it's called `it`
|
5739
|
-
pm_constant_id_t id = ((pm_call_node_t *)node)->name;
|
5740
|
-
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
|
5741
|
-
return pm_token_is_it(constant->start, constant->start + constant->length);
|
5742
|
-
}
|
5743
|
-
|
5744
5854
|
/**
|
5745
5855
|
* Returns true if the given bounds comprise a numbered parameter (i.e., they
|
5746
5856
|
* are of the form /^_\d$/).
|
@@ -6891,7 +7001,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
|
|
6891
7001
|
case PM_REDO_NODE:
|
6892
7002
|
case PM_RETRY_NODE:
|
6893
7003
|
case PM_RETURN_NODE:
|
6894
|
-
pm_parser_warn_node(parser,
|
7004
|
+
pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
|
6895
7005
|
break;
|
6896
7006
|
default:
|
6897
7007
|
break;
|
@@ -7300,9 +7410,9 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
|
|
7300
7410
|
{
|
7301
7411
|
.type = PM_SYMBOL_NODE,
|
7302
7412
|
.flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
|
7303
|
-
.location =
|
7413
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
7304
7414
|
},
|
7305
|
-
.value_loc =
|
7415
|
+
.value_loc = PM_LOCATION_NULL_VALUE(parser),
|
7306
7416
|
.unescaped = { 0 }
|
7307
7417
|
};
|
7308
7418
|
|
@@ -7703,10 +7813,10 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
|
|
7703
7813
|
*node = (pm_while_node_t) {
|
7704
7814
|
{
|
7705
7815
|
.type = PM_WHILE_NODE,
|
7706
|
-
.location =
|
7816
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
7707
7817
|
},
|
7708
|
-
.keyword_loc =
|
7709
|
-
.closing_loc =
|
7818
|
+
.keyword_loc = PM_LOCATION_NULL_VALUE(parser),
|
7819
|
+
.closing_loc = PM_LOCATION_NULL_VALUE(parser),
|
7710
7820
|
.predicate = predicate,
|
7711
7821
|
.statements = statements
|
7712
7822
|
};
|
@@ -7861,51 +7971,6 @@ pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t leng
|
|
7861
7971
|
return constant_id;
|
7862
7972
|
}
|
7863
7973
|
|
7864
|
-
/**
|
7865
|
-
* Create a local variable read that is reading the implicit 'it' variable.
|
7866
|
-
*/
|
7867
|
-
static pm_local_variable_read_node_t *
|
7868
|
-
pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *name) {
|
7869
|
-
if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_ORDINARY) {
|
7870
|
-
pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
|
7871
|
-
return NULL;
|
7872
|
-
}
|
7873
|
-
|
7874
|
-
if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED) {
|
7875
|
-
pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
|
7876
|
-
return NULL;
|
7877
|
-
}
|
7878
|
-
|
7879
|
-
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IT;
|
7880
|
-
|
7881
|
-
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
7882
|
-
pm_parser_local_add(parser, name_id, name->start, name->end, 0);
|
7883
|
-
|
7884
|
-
return pm_local_variable_read_node_create_constant_id(parser, name, name_id, 0, false);
|
7885
|
-
}
|
7886
|
-
|
7887
|
-
/**
|
7888
|
-
* Convert a `it` variable call node to a node for `it` default parameter.
|
7889
|
-
*/
|
7890
|
-
static pm_node_t *
|
7891
|
-
pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
|
7892
|
-
if (
|
7893
|
-
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
|
7894
|
-
!parser->current_scope->closed &&
|
7895
|
-
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
7896
|
-
pm_node_is_it(parser, node)
|
7897
|
-
) {
|
7898
|
-
pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
|
7899
|
-
|
7900
|
-
if (read != NULL) {
|
7901
|
-
pm_node_destroy(parser, node);
|
7902
|
-
node = (pm_node_t *) read;
|
7903
|
-
}
|
7904
|
-
}
|
7905
|
-
|
7906
|
-
return node;
|
7907
|
-
}
|
7908
|
-
|
7909
7974
|
/**
|
7910
7975
|
* Add a parameter name to the current scope and check whether the name of the
|
7911
7976
|
* parameter is unique or not.
|
@@ -7941,6 +8006,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
|
|
7941
8006
|
pm_scope_t *scope = parser->current_scope;
|
7942
8007
|
parser->current_scope = scope->previous;
|
7943
8008
|
pm_locals_free(&scope->locals);
|
8009
|
+
pm_node_list_free(&scope->implicit_parameters);
|
7944
8010
|
xfree(scope);
|
7945
8011
|
}
|
7946
8012
|
|
@@ -8012,7 +8078,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) {
|
|
8012
8078
|
* is beyond the end of the source then return '\0'.
|
8013
8079
|
*/
|
8014
8080
|
static inline uint8_t
|
8015
|
-
peek_at(pm_parser_t *parser, const uint8_t *cursor) {
|
8081
|
+
peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
|
8016
8082
|
if (cursor < parser->end) {
|
8017
8083
|
return *cursor;
|
8018
8084
|
} else {
|
@@ -8035,7 +8101,7 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
|
|
8035
8101
|
* that position is beyond the end of the source then return '\0'.
|
8036
8102
|
*/
|
8037
8103
|
static inline uint8_t
|
8038
|
-
peek(pm_parser_t *parser) {
|
8104
|
+
peek(const pm_parser_t *parser) {
|
8039
8105
|
return peek_at(parser, parser->current.end);
|
8040
8106
|
}
|
8041
8107
|
|
@@ -8100,6 +8166,14 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
|
|
8100
8166
|
return memchr(cursor, '\n', (size_t) length);
|
8101
8167
|
}
|
8102
8168
|
|
8169
|
+
/**
|
8170
|
+
* This is equivalent to the predicate of warn_balanced in CRuby.
|
8171
|
+
*/
|
8172
|
+
static inline bool
|
8173
|
+
ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
|
8174
|
+
return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
|
8175
|
+
}
|
8176
|
+
|
8103
8177
|
/**
|
8104
8178
|
* Here we're going to check if this is a "magic" comment, and perform whatever
|
8105
8179
|
* actions are necessary for it here.
|
@@ -8339,7 +8413,12 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
|
|
8339
8413
|
// If we have hit a ractor pragma, attempt to lex that.
|
8340
8414
|
uint32_t value_length = (uint32_t) (value_end - value_start);
|
8341
8415
|
if (key_length == 24 && pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
|
8342
|
-
|
8416
|
+
const uint8_t *cursor = parser->current.start;
|
8417
|
+
while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
|
8418
|
+
|
8419
|
+
if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
|
8420
|
+
pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
|
8421
|
+
} else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
|
8343
8422
|
pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
|
8344
8423
|
} else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
|
8345
8424
|
pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
|
@@ -8796,6 +8875,16 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8796
8875
|
type = lex_optional_float_suffix(parser, seen_e);
|
8797
8876
|
}
|
8798
8877
|
|
8878
|
+
// At this point we have a completed number, but we want to provide the user
|
8879
|
+
// with a good experience if they put an additional .xxx fractional
|
8880
|
+
// component on the end, so we'll check for that here.
|
8881
|
+
if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
|
8882
|
+
const uint8_t *fraction_start = parser->current.end;
|
8883
|
+
const uint8_t *fraction_end = parser->current.end + 2;
|
8884
|
+
fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
|
8885
|
+
pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
|
8886
|
+
}
|
8887
|
+
|
8799
8888
|
return type;
|
8800
8889
|
}
|
8801
8890
|
|
@@ -8925,8 +9014,8 @@ lex_global_variable(pm_parser_t *parser) {
|
|
8925
9014
|
// If we get here, then we have a $ followed by something that
|
8926
9015
|
// isn't recognized as a global variable.
|
8927
9016
|
pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
8928
|
-
|
8929
|
-
|
9017
|
+
const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9018
|
+
PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
|
8930
9019
|
}
|
8931
9020
|
|
8932
9021
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -9297,12 +9386,20 @@ escape_hexadecimal_digit(const uint8_t value) {
|
|
9297
9386
|
* validated.
|
9298
9387
|
*/
|
9299
9388
|
static inline uint32_t
|
9300
|
-
escape_unicode(const uint8_t *string, size_t length) {
|
9389
|
+
escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
|
9301
9390
|
uint32_t value = 0;
|
9302
9391
|
for (size_t index = 0; index < length; index++) {
|
9303
9392
|
if (index != 0) value <<= 4;
|
9304
9393
|
value |= escape_hexadecimal_digit(string[index]);
|
9305
9394
|
}
|
9395
|
+
|
9396
|
+
// Here we're going to verify that the value is actually a valid Unicode
|
9397
|
+
// codepoint and not a surrogate pair.
|
9398
|
+
if (value >= 0xD800 && value <= 0xDFFF) {
|
9399
|
+
pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
|
9400
|
+
return 0xFFFD;
|
9401
|
+
}
|
9402
|
+
|
9306
9403
|
return value;
|
9307
9404
|
}
|
9308
9405
|
|
@@ -9311,7 +9408,7 @@ escape_unicode(const uint8_t *string, size_t length) {
|
|
9311
9408
|
*/
|
9312
9409
|
static inline uint8_t
|
9313
9410
|
escape_byte(uint8_t value, const uint8_t flags) {
|
9314
|
-
if (flags & PM_ESCAPE_FLAG_CONTROL) value &=
|
9411
|
+
if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
|
9315
9412
|
if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
|
9316
9413
|
return value;
|
9317
9414
|
}
|
@@ -9411,22 +9508,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
|
|
9411
9508
|
static inline void
|
9412
9509
|
escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
|
9413
9510
|
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9414
|
-
|
9415
|
-
|
9416
|
-
uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
|
9417
|
-
uint8_t byte2 = (uint8_t) (byte & 0xF);
|
9418
|
-
|
9419
|
-
if (byte1 >= 0xA) {
|
9420
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
|
9421
|
-
} else {
|
9422
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte1 + '0'));
|
9423
|
-
}
|
9424
|
-
|
9425
|
-
if (byte2 >= 0xA) {
|
9426
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 - 0xA + 'A'));
|
9427
|
-
} else {
|
9428
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 + '0'));
|
9429
|
-
}
|
9511
|
+
pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
|
9430
9512
|
}
|
9431
9513
|
|
9432
9514
|
escape_write_byte_encoded(parser, buffer, byte);
|
@@ -9461,57 +9543,57 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9461
9543
|
switch (peek(parser)) {
|
9462
9544
|
case '\\': {
|
9463
9545
|
parser->current.end++;
|
9464
|
-
|
9546
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
|
9465
9547
|
return;
|
9466
9548
|
}
|
9467
9549
|
case '\'': {
|
9468
9550
|
parser->current.end++;
|
9469
|
-
|
9551
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
|
9470
9552
|
return;
|
9471
9553
|
}
|
9472
9554
|
case 'a': {
|
9473
9555
|
parser->current.end++;
|
9474
|
-
|
9556
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
|
9475
9557
|
return;
|
9476
9558
|
}
|
9477
9559
|
case 'b': {
|
9478
9560
|
parser->current.end++;
|
9479
|
-
|
9561
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
|
9480
9562
|
return;
|
9481
9563
|
}
|
9482
9564
|
case 'e': {
|
9483
9565
|
parser->current.end++;
|
9484
|
-
|
9566
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
|
9485
9567
|
return;
|
9486
9568
|
}
|
9487
9569
|
case 'f': {
|
9488
9570
|
parser->current.end++;
|
9489
|
-
|
9571
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
|
9490
9572
|
return;
|
9491
9573
|
}
|
9492
9574
|
case 'n': {
|
9493
9575
|
parser->current.end++;
|
9494
|
-
|
9576
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
|
9495
9577
|
return;
|
9496
9578
|
}
|
9497
9579
|
case 'r': {
|
9498
9580
|
parser->current.end++;
|
9499
|
-
|
9581
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
|
9500
9582
|
return;
|
9501
9583
|
}
|
9502
9584
|
case 's': {
|
9503
9585
|
parser->current.end++;
|
9504
|
-
|
9586
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
|
9505
9587
|
return;
|
9506
9588
|
}
|
9507
9589
|
case 't': {
|
9508
9590
|
parser->current.end++;
|
9509
|
-
|
9591
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
|
9510
9592
|
return;
|
9511
9593
|
}
|
9512
9594
|
case 'v': {
|
9513
9595
|
parser->current.end++;
|
9514
|
-
|
9596
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
|
9515
9597
|
return;
|
9516
9598
|
}
|
9517
9599
|
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
|
@@ -9528,7 +9610,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9528
9610
|
}
|
9529
9611
|
}
|
9530
9612
|
|
9531
|
-
|
9613
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
|
9532
9614
|
return;
|
9533
9615
|
}
|
9534
9616
|
case 'x': {
|
@@ -9547,8 +9629,13 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9547
9629
|
parser->current.end++;
|
9548
9630
|
}
|
9549
9631
|
|
9632
|
+
value = escape_byte(value, flags);
|
9550
9633
|
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9551
|
-
|
9634
|
+
if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
|
9635
|
+
pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
|
9636
|
+
} else {
|
9637
|
+
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
|
9638
|
+
}
|
9552
9639
|
}
|
9553
9640
|
|
9554
9641
|
escape_write_byte_encoded(parser, buffer, value);
|
@@ -9580,7 +9667,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9580
9667
|
pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
|
9581
9668
|
} else if (hexadecimal_length == 0) {
|
9582
9669
|
// there are not hexadecimal characters
|
9583
|
-
pm_parser_err(parser,
|
9670
|
+
pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
|
9671
|
+
pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
|
9584
9672
|
return;
|
9585
9673
|
}
|
9586
9674
|
|
@@ -9590,7 +9678,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9590
9678
|
extra_codepoints_start = unicode_start;
|
9591
9679
|
}
|
9592
9680
|
|
9593
|
-
uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
|
9681
|
+
uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
|
9594
9682
|
escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
|
9595
9683
|
|
9596
9684
|
parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
|
@@ -9615,7 +9703,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9615
9703
|
size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
|
9616
9704
|
|
9617
9705
|
if (length == 4) {
|
9618
|
-
uint32_t value = escape_unicode(parser->current.end, 4);
|
9706
|
+
uint32_t value = escape_unicode(parser, parser->current.end, 4);
|
9619
9707
|
|
9620
9708
|
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9621
9709
|
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
|
@@ -9651,6 +9739,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9651
9739
|
return;
|
9652
9740
|
}
|
9653
9741
|
parser->current.end++;
|
9742
|
+
|
9743
|
+
if (match(parser, 'u') || match(parser, 'U')) {
|
9744
|
+
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9745
|
+
return;
|
9746
|
+
}
|
9747
|
+
|
9654
9748
|
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
|
9655
9749
|
return;
|
9656
9750
|
case ' ':
|
@@ -9678,7 +9772,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9678
9772
|
case 'C': {
|
9679
9773
|
parser->current.end++;
|
9680
9774
|
if (peek(parser) != '-') {
|
9681
|
-
|
9775
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9776
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
|
9682
9777
|
return;
|
9683
9778
|
}
|
9684
9779
|
|
@@ -9701,6 +9796,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9701
9796
|
return;
|
9702
9797
|
}
|
9703
9798
|
parser->current.end++;
|
9799
|
+
|
9800
|
+
if (match(parser, 'u') || match(parser, 'U')) {
|
9801
|
+
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9802
|
+
return;
|
9803
|
+
}
|
9804
|
+
|
9704
9805
|
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
|
9705
9806
|
return;
|
9706
9807
|
case ' ':
|
@@ -9715,7 +9816,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9715
9816
|
return;
|
9716
9817
|
default: {
|
9717
9818
|
if (!char_is_ascii_printable(peeked)) {
|
9718
|
-
|
9819
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9820
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
|
9719
9821
|
return;
|
9720
9822
|
}
|
9721
9823
|
|
@@ -9728,7 +9830,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9728
9830
|
case 'M': {
|
9729
9831
|
parser->current.end++;
|
9730
9832
|
if (peek(parser) != '-') {
|
9731
|
-
|
9833
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9834
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
|
9732
9835
|
return;
|
9733
9836
|
}
|
9734
9837
|
|
@@ -9746,6 +9849,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9746
9849
|
return;
|
9747
9850
|
}
|
9748
9851
|
parser->current.end++;
|
9852
|
+
|
9853
|
+
if (match(parser, 'u') || match(parser, 'U')) {
|
9854
|
+
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9855
|
+
return;
|
9856
|
+
}
|
9857
|
+
|
9749
9858
|
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
|
9750
9859
|
return;
|
9751
9860
|
case ' ':
|
@@ -9760,7 +9869,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9760
9869
|
return;
|
9761
9870
|
default:
|
9762
9871
|
if (!char_is_ascii_printable(peeked)) {
|
9763
|
-
|
9872
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9873
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
|
9764
9874
|
return;
|
9765
9875
|
}
|
9766
9876
|
|
@@ -10721,6 +10831,8 @@ parser_lex(pm_parser_t *parser) {
|
|
10721
10831
|
type = PM_TOKEN_USTAR_STAR;
|
10722
10832
|
} else if (lex_state_beg_p(parser)) {
|
10723
10833
|
type = PM_TOKEN_USTAR_STAR;
|
10834
|
+
} else if (ambiguous_operator_p(parser, space_seen)) {
|
10835
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
|
10724
10836
|
}
|
10725
10837
|
|
10726
10838
|
if (lex_state_operator_p(parser)) {
|
@@ -10744,6 +10856,8 @@ parser_lex(pm_parser_t *parser) {
|
|
10744
10856
|
type = PM_TOKEN_USTAR;
|
10745
10857
|
} else if (lex_state_beg_p(parser)) {
|
10746
10858
|
type = PM_TOKEN_USTAR;
|
10859
|
+
} else if (ambiguous_operator_p(parser, space_seen)) {
|
10860
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
|
10747
10861
|
}
|
10748
10862
|
|
10749
10863
|
if (lex_state_operator_p(parser)) {
|
@@ -10860,6 +10974,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10860
10974
|
// If we have quotes, then we're going to go until we find the
|
10861
10975
|
// end quote.
|
10862
10976
|
while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
|
10977
|
+
if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
|
10863
10978
|
parser->current.end++;
|
10864
10979
|
}
|
10865
10980
|
}
|
@@ -10917,6 +11032,10 @@ parser_lex(pm_parser_t *parser) {
|
|
10917
11032
|
LEX(PM_TOKEN_LESS_LESS_EQUAL);
|
10918
11033
|
}
|
10919
11034
|
|
11035
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11036
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
|
11037
|
+
}
|
11038
|
+
|
10920
11039
|
if (lex_state_operator_p(parser)) {
|
10921
11040
|
lex_state_set(parser, PM_LEX_STATE_ARG);
|
10922
11041
|
} else {
|
@@ -11030,6 +11149,8 @@ parser_lex(pm_parser_t *parser) {
|
|
11030
11149
|
type = PM_TOKEN_UAMPERSAND;
|
11031
11150
|
} else if (lex_state_beg_p(parser)) {
|
11032
11151
|
type = PM_TOKEN_UAMPERSAND;
|
11152
|
+
} else if (ambiguous_operator_p(parser, space_seen)) {
|
11153
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
|
11033
11154
|
}
|
11034
11155
|
|
11035
11156
|
if (lex_state_operator_p(parser)) {
|
@@ -11104,6 +11225,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11104
11225
|
LEX(PM_TOKEN_UPLUS);
|
11105
11226
|
}
|
11106
11227
|
|
11228
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11229
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
|
11230
|
+
}
|
11231
|
+
|
11107
11232
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
11108
11233
|
LEX(PM_TOKEN_PLUS);
|
11109
11234
|
}
|
@@ -11141,6 +11266,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11141
11266
|
LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
|
11142
11267
|
}
|
11143
11268
|
|
11269
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11270
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
|
11271
|
+
}
|
11272
|
+
|
11144
11273
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
11145
11274
|
LEX(PM_TOKEN_MINUS);
|
11146
11275
|
}
|
@@ -11239,6 +11368,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11239
11368
|
LEX(PM_TOKEN_REGEXP_BEGIN);
|
11240
11369
|
}
|
11241
11370
|
|
11371
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11372
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
|
11373
|
+
}
|
11374
|
+
|
11242
11375
|
if (lex_state_operator_p(parser)) {
|
11243
11376
|
lex_state_set(parser, PM_LEX_STATE_ARG);
|
11244
11377
|
} else {
|
@@ -11274,7 +11407,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11274
11407
|
// operator because we don't want to move into the string
|
11275
11408
|
// lex mode unnecessarily.
|
11276
11409
|
if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
|
11277
|
-
pm_parser_err_current(parser,
|
11410
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
|
11278
11411
|
LEX(PM_TOKEN_PERCENT);
|
11279
11412
|
}
|
11280
11413
|
|
@@ -11293,10 +11426,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11293
11426
|
|
11294
11427
|
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
11295
11428
|
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
11296
|
-
|
11297
|
-
if (parser->current.end < parser->end) {
|
11298
|
-
LEX(PM_TOKEN_STRING_BEGIN);
|
11299
|
-
}
|
11429
|
+
LEX(PM_TOKEN_STRING_BEGIN);
|
11300
11430
|
}
|
11301
11431
|
|
11302
11432
|
// Delimiters for %-literals cannot be alphanumeric. We
|
@@ -11423,6 +11553,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11423
11553
|
}
|
11424
11554
|
}
|
11425
11555
|
|
11556
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11557
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
|
11558
|
+
}
|
11559
|
+
|
11426
11560
|
lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
|
11427
11561
|
LEX(PM_TOKEN_PERCENT);
|
11428
11562
|
}
|
@@ -12233,9 +12367,10 @@ parser_lex(pm_parser_t *parser) {
|
|
12233
12367
|
|
12234
12368
|
// If we are immediately following a newline and we have hit the
|
12235
12369
|
// terminator, then we need to return the ending of the heredoc.
|
12236
|
-
if (
|
12370
|
+
if (current_token_starts_line(parser)) {
|
12237
12371
|
const uint8_t *start = parser->current.start;
|
12238
|
-
|
12372
|
+
|
12373
|
+
if (!line_continuation && (start + ident_length <= parser->end)) {
|
12239
12374
|
const uint8_t *newline = next_newline(start, parser->end - start);
|
12240
12375
|
const uint8_t *ident_end = newline;
|
12241
12376
|
const uint8_t *terminator_end = newline;
|
@@ -12391,11 +12526,8 @@ parser_lex(pm_parser_t *parser) {
|
|
12391
12526
|
}
|
12392
12527
|
|
12393
12528
|
parser->current.end = breakpoint + 1;
|
12394
|
-
|
12395
|
-
|
12396
|
-
pm_token_buffer_flush(parser, &token_buffer);
|
12397
|
-
LEX(PM_TOKEN_STRING_CONTENT);
|
12398
|
-
}
|
12529
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
12530
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
12399
12531
|
}
|
12400
12532
|
|
12401
12533
|
// Otherwise we hit a newline and it wasn't followed by
|
@@ -13030,11 +13162,40 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
|
|
13030
13162
|
return (pm_node_t *) result;
|
13031
13163
|
}
|
13032
13164
|
|
13165
|
+
/**
|
13166
|
+
* When an implicit local variable is written to or targeted, it becomes a
|
13167
|
+
* regular, named local variable. This function removes it from the list of
|
13168
|
+
* implicit parameters when that happens.
|
13169
|
+
*/
|
13170
|
+
static void
|
13171
|
+
parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
|
13172
|
+
pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
|
13173
|
+
|
13174
|
+
for (size_t index = 0; index < implicit_parameters->size; index++) {
|
13175
|
+
if (implicit_parameters->nodes[index] == node) {
|
13176
|
+
// If the node is not the last one in the list, we need to shift the
|
13177
|
+
// remaining nodes down to fill the gap. This is extremely unlikely
|
13178
|
+
// to happen.
|
13179
|
+
if (index != implicit_parameters->size - 1) {
|
13180
|
+
memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
|
13181
|
+
}
|
13182
|
+
|
13183
|
+
implicit_parameters->size--;
|
13184
|
+
break;
|
13185
|
+
}
|
13186
|
+
}
|
13187
|
+
}
|
13188
|
+
|
13033
13189
|
/**
|
13034
13190
|
* Convert the given node into a valid target node.
|
13191
|
+
*
|
13192
|
+
* @param multiple Whether or not this target is part of a larger set of
|
13193
|
+
* targets. If it is, then the &. operator is not allowed.
|
13194
|
+
* @param splat Whether or not this target is a child of a splat target. If it
|
13195
|
+
* is, then fewer patterns are allowed.
|
13035
13196
|
*/
|
13036
13197
|
static pm_node_t *
|
13037
|
-
parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
13198
|
+
parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
|
13038
13199
|
switch (PM_NODE_TYPE(target)) {
|
13039
13200
|
case PM_MISSING_NODE:
|
13040
13201
|
return target;
|
@@ -13080,7 +13241,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
|
13080
13241
|
target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
|
13081
13242
|
return target;
|
13082
13243
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
13083
|
-
|
13244
|
+
if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
|
13245
|
+
PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
|
13246
|
+
parse_target_implicit_parameter(parser, target);
|
13247
|
+
}
|
13084
13248
|
|
13085
13249
|
const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
|
13086
13250
|
uint32_t name = cast->name;
|
@@ -13092,17 +13256,32 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
|
13092
13256
|
|
13093
13257
|
return target;
|
13094
13258
|
}
|
13259
|
+
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
|
13260
|
+
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
|
13261
|
+
pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
|
13262
|
+
|
13263
|
+
parse_target_implicit_parameter(parser, target);
|
13264
|
+
pm_node_destroy(parser, target);
|
13265
|
+
|
13266
|
+
return node;
|
13267
|
+
}
|
13095
13268
|
case PM_INSTANCE_VARIABLE_READ_NODE:
|
13096
13269
|
assert(sizeof(pm_instance_variable_target_node_t) == sizeof(pm_instance_variable_read_node_t));
|
13097
13270
|
target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
|
13098
13271
|
return target;
|
13099
13272
|
case PM_MULTI_TARGET_NODE:
|
13273
|
+
if (splat_parent) {
|
13274
|
+
// Multi target is not accepted in all positions. If this is one
|
13275
|
+
// of them, then we need to add an error.
|
13276
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
13277
|
+
}
|
13278
|
+
|
13100
13279
|
return target;
|
13101
13280
|
case PM_SPLAT_NODE: {
|
13102
13281
|
pm_splat_node_t *splat = (pm_splat_node_t *) target;
|
13103
13282
|
|
13104
13283
|
if (splat->expression != NULL) {
|
13105
|
-
splat->expression = parse_target(parser, splat->expression, multiple);
|
13284
|
+
splat->expression = parse_target(parser, splat->expression, multiple, true);
|
13106
13285
|
}
|
13107
13286
|
|
13108
13287
|
return (pm_node_t *) splat;
|
@@ -13172,9 +13351,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
|
13172
13351
|
*/
|
13173
13352
|
static pm_node_t *
|
13174
13353
|
parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
13175
|
-
pm_node_t *result = parse_target(parser, target, multiple);
|
13354
|
+
pm_node_t *result = parse_target(parser, target, multiple, false);
|
13176
13355
|
|
13177
|
-
// Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
|
13356
|
+
// Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
|
13357
|
+
// parens after the targets.
|
13178
13358
|
if (
|
13179
13359
|
!match1(parser, PM_TOKEN_EQUAL) &&
|
13180
13360
|
!(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
|
@@ -13244,18 +13424,34 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13244
13424
|
return (pm_node_t *) node;
|
13245
13425
|
}
|
13246
13426
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
13247
|
-
pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
|
13248
13427
|
pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
|
13249
13428
|
|
13250
13429
|
pm_constant_id_t name = local_read->name;
|
13430
|
+
pm_location_t name_loc = target->location;
|
13431
|
+
|
13251
13432
|
uint32_t depth = local_read->depth;
|
13252
|
-
|
13433
|
+
pm_scope_t *scope = pm_parser_scope_find(parser, depth);
|
13253
13434
|
|
13254
|
-
|
13435
|
+
if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
|
13436
|
+
pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
|
13437
|
+
PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
|
13438
|
+
parse_target_implicit_parameter(parser, target);
|
13439
|
+
}
|
13440
|
+
|
13441
|
+
pm_locals_unread(&scope->locals, name);
|
13255
13442
|
pm_node_destroy(parser, target);
|
13256
13443
|
|
13257
13444
|
return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
|
13258
13445
|
}
|
13446
|
+
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
|
13447
|
+
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
|
13448
|
+
pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
|
13449
|
+
|
13450
|
+
parse_target_implicit_parameter(parser, target);
|
13451
|
+
pm_node_destroy(parser, target);
|
13452
|
+
|
13453
|
+
return node;
|
13454
|
+
}
|
13259
13455
|
case PM_INSTANCE_VARIABLE_READ_NODE: {
|
13260
13456
|
pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
|
13261
13457
|
pm_node_destroy(parser, target);
|
@@ -13409,7 +13605,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13409
13605
|
bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
|
13410
13606
|
|
13411
13607
|
pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
|
13412
|
-
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
|
13608
|
+
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
|
13413
13609
|
|
13414
13610
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
13415
13611
|
if (accept1(parser, PM_TOKEN_USTAR)) {
|
@@ -13425,7 +13621,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13425
13621
|
|
13426
13622
|
if (token_begins_expression_p(parser->current.type)) {
|
13427
13623
|
name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
|
13428
|
-
name = parse_target(parser, name, true);
|
13624
|
+
name = parse_target(parser, name, true, true);
|
13429
13625
|
}
|
13430
13626
|
|
13431
13627
|
pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
|
@@ -13433,7 +13629,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13433
13629
|
has_rest = true;
|
13434
13630
|
} else if (token_begins_expression_p(parser->current.type)) {
|
13435
13631
|
pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
|
13436
|
-
target = parse_target(parser, target, true);
|
13632
|
+
target = parse_target(parser, target, true, false);
|
13437
13633
|
|
13438
13634
|
pm_multi_target_node_targets_append(parser, result, target);
|
13439
13635
|
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
@@ -13470,8 +13666,8 @@ parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_
|
|
13470
13666
|
*/
|
13471
13667
|
static pm_statements_node_t *
|
13472
13668
|
parse_statements(pm_parser_t *parser, pm_context_t context) {
|
13473
|
-
// First, skip past any optional terminators that might be at the beginning
|
13474
|
-
// the statements.
|
13669
|
+
// First, skip past any optional terminators that might be at the beginning
|
13670
|
+
// of the statements.
|
13475
13671
|
while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
|
13476
13672
|
|
13477
13673
|
// If we have a terminator, then we can just return NULL.
|
@@ -13487,20 +13683,20 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13487
13683
|
pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
|
13488
13684
|
pm_statements_node_body_append(parser, statements, node);
|
13489
13685
|
|
13490
|
-
// If we're recovering from a syntax error, then we need to stop parsing
|
13491
|
-
// statements now.
|
13686
|
+
// If we're recovering from a syntax error, then we need to stop parsing
|
13687
|
+
// the statements now.
|
13492
13688
|
if (parser->recovering) {
|
13493
|
-
// If this is the level of context where the recovery has happened,
|
13494
|
-
// we can mark the parser as done recovering.
|
13689
|
+
// If this is the level of context where the recovery has happened,
|
13690
|
+
// then we can mark the parser as done recovering.
|
13495
13691
|
if (context_terminator(context, &parser->current)) parser->recovering = false;
|
13496
13692
|
break;
|
13497
13693
|
}
|
13498
13694
|
|
13499
|
-
// If we have a terminator, then we will parse all consecutive
|
13500
|
-
// and then continue parsing the statements list.
|
13695
|
+
// If we have a terminator, then we will parse all consecutive
|
13696
|
+
// terminators and then continue parsing the statements list.
|
13501
13697
|
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
13502
|
-
// If we have a terminator, then we will continue parsing the
|
13503
|
-
// list.
|
13698
|
+
// If we have a terminator, then we will continue parsing the
|
13699
|
+
// statements list.
|
13504
13700
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
13505
13701
|
if (context_terminator(context, &parser->current)) break;
|
13506
13702
|
|
@@ -13508,27 +13704,28 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13508
13704
|
continue;
|
13509
13705
|
}
|
13510
13706
|
|
13511
|
-
// At this point we have a list of statements that are not terminated by
|
13512
|
-
// newline or semicolon. At this point we need to check if we're at
|
13513
|
-
// of the statements list. If we are, then we should break out
|
13707
|
+
// At this point we have a list of statements that are not terminated by
|
13708
|
+
// a newline or semicolon. At this point we need to check if we're at
|
13709
|
+
// the end of the statements list. If we are, then we should break out
|
13710
|
+
// of the loop.
|
13514
13711
|
if (context_terminator(context, &parser->current)) break;
|
13515
13712
|
|
13516
13713
|
// At this point, we have a syntax error, because the statement was not
|
13517
13714
|
// terminated by a newline or semicolon, and we're not at the end of the
|
13518
|
-
// statements list. Ideally we should scan forward to determine if we
|
13519
|
-
// insert a missing terminator or break out of parsing the
|
13520
|
-
// at this point.
|
13715
|
+
// statements list. Ideally we should scan forward to determine if we
|
13716
|
+
// should insert a missing terminator or break out of parsing the
|
13717
|
+
// statements list at this point.
|
13521
13718
|
//
|
13522
|
-
// We don't have that yet, so instead we'll do a more naive approach. If
|
13523
|
-
// were unable to parse an expression, then we will skip past this
|
13524
|
-
// continue parsing the statements list. Otherwise we'll add
|
13525
|
-
// continue parsing the statements list.
|
13719
|
+
// We don't have that yet, so instead we'll do a more naive approach. If
|
13720
|
+
// we were unable to parse an expression, then we will skip past this
|
13721
|
+
// token and continue parsing the statements list. Otherwise we'll add
|
13722
|
+
// an error and continue parsing the statements list.
|
13526
13723
|
if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
|
13527
13724
|
parser_lex(parser);
|
13528
13725
|
|
13529
13726
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
13530
13727
|
if (context_terminator(context, &parser->current)) break;
|
13531
|
-
} else if (!
|
13728
|
+
} else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
|
13532
13729
|
// This is an inlined version of accept1 because the error that we
|
13533
13730
|
// want to add has varargs. If this happens again, we should
|
13534
13731
|
// probably extract a helper function.
|
@@ -13550,7 +13747,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13550
13747
|
*/
|
13551
13748
|
static void
|
13552
13749
|
pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13553
|
-
const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
|
13750
|
+
const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
|
13554
13751
|
|
13555
13752
|
if (duplicated != NULL) {
|
13556
13753
|
pm_buffer_t buffer = { 0 };
|
@@ -13576,13 +13773,16 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
|
|
13576
13773
|
*/
|
13577
13774
|
static void
|
13578
13775
|
pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13579
|
-
|
13776
|
+
pm_node_t *previous;
|
13777
|
+
|
13778
|
+
if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
|
13580
13779
|
pm_diagnostic_list_append_format(
|
13581
13780
|
&parser->warning_list,
|
13582
13781
|
node->location.start,
|
13583
13782
|
node->location.end,
|
13584
13783
|
PM_WARN_DUPLICATED_WHEN_CLAUSE,
|
13585
|
-
pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
|
13784
|
+
pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
|
13785
|
+
pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
|
13586
13786
|
);
|
13587
13787
|
}
|
13588
13788
|
}
|
@@ -14036,31 +14236,37 @@ static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
|
|
14036
14236
|
* Check if current parameter follows valid parameters ordering. If not it adds
|
14037
14237
|
* an error to the list without stopping the parsing, otherwise sets the
|
14038
14238
|
* parameters state to the one corresponding to the current parameter.
|
14239
|
+
*
|
14240
|
+
* It returns true if it was successful, and false otherwise.
|
14039
14241
|
*/
|
14040
|
-
static
|
14242
|
+
static bool
|
14041
14243
|
update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
|
14042
14244
|
pm_parameters_order_t state = parameters_ordering[token->type];
|
14043
|
-
if (state == PM_PARAMETERS_NO_CHANGE) return;
|
14245
|
+
if (state == PM_PARAMETERS_NO_CHANGE) return true;
|
14044
14246
|
|
14045
14247
|
// If we see another ordered argument after a optional argument
|
14046
14248
|
// we only continue parsing ordered arguments until we stop seeing ordered arguments.
|
14047
14249
|
if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
|
14048
14250
|
*current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
|
14049
|
-
return;
|
14251
|
+
return true;
|
14050
14252
|
} else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
|
14051
|
-
return;
|
14253
|
+
return true;
|
14052
14254
|
}
|
14053
14255
|
|
14054
14256
|
if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
|
14055
14257
|
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
|
14056
|
-
|
14057
|
-
|
14058
|
-
|
14258
|
+
return false;
|
14259
|
+
} else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
|
14260
|
+
pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
|
14261
|
+
return false;
|
14262
|
+
} else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
|
14059
14263
|
// We know what transition we failed on, so we can provide a better error here.
|
14060
14264
|
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
|
14061
|
-
|
14062
|
-
*current = state;
|
14265
|
+
return false;
|
14063
14266
|
}
|
14267
|
+
|
14268
|
+
if (state < *current) *current = state;
|
14269
|
+
return true;
|
14064
14270
|
}
|
14065
14271
|
|
14066
14272
|
/**
|
@@ -14129,27 +14335,22 @@ parse_parameters(
|
|
14129
14335
|
pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
14130
14336
|
}
|
14131
14337
|
|
14132
|
-
|
14133
|
-
|
14134
|
-
parser_lex(parser);
|
14338
|
+
bool succeeded = update_parameter_state(parser, &parser->current, &order);
|
14339
|
+
parser_lex(parser);
|
14135
14340
|
|
14136
|
-
|
14341
|
+
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
|
14342
|
+
pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
|
14137
14343
|
|
14138
|
-
|
14139
|
-
|
14140
|
-
|
14141
|
-
|
14142
|
-
|
14143
|
-
|
14144
|
-
|
14145
|
-
params->keyword_rest = NULL;
|
14146
|
-
}
|
14147
|
-
pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
|
14148
|
-
} else {
|
14149
|
-
update_parameter_state(parser, &parser->current, &order);
|
14150
|
-
parser_lex(parser);
|
14344
|
+
if (params->keyword_rest != NULL) {
|
14345
|
+
// If we already have a keyword rest parameter, then we replace it with the
|
14346
|
+
// forwarding parameter and move the keyword rest parameter to the posts list.
|
14347
|
+
pm_node_t *keyword_rest = params->keyword_rest;
|
14348
|
+
pm_parameters_node_posts_append(params, keyword_rest);
|
14349
|
+
if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
|
14350
|
+
params->keyword_rest = NULL;
|
14151
14351
|
}
|
14152
14352
|
|
14353
|
+
pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
|
14153
14354
|
break;
|
14154
14355
|
}
|
14155
14356
|
case PM_TOKEN_CLASS_VARIABLE:
|
@@ -14193,7 +14394,7 @@ parse_parameters(
|
|
14193
14394
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
14194
14395
|
|
14195
14396
|
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
|
14196
|
-
uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
|
14397
|
+
uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
|
14197
14398
|
|
14198
14399
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
|
14199
14400
|
pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
|
@@ -14206,7 +14407,7 @@ parse_parameters(
|
|
14206
14407
|
// If the value of the parameter increased the number of
|
14207
14408
|
// reads of that parameter, then we need to warn that we
|
14208
14409
|
// have a circular definition.
|
14209
|
-
if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
|
14410
|
+
if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
|
14210
14411
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
|
14211
14412
|
}
|
14212
14413
|
|
@@ -14244,6 +14445,12 @@ parse_parameters(
|
|
14244
14445
|
pm_token_t local = name;
|
14245
14446
|
local.end -= 1;
|
14246
14447
|
|
14448
|
+
if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
|
14449
|
+
pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
|
14450
|
+
} else if (local.end[-1] == '!' || local.end[-1] == '?') {
|
14451
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
|
14452
|
+
}
|
14453
|
+
|
14247
14454
|
bool repeated = pm_parser_parameter_name_check(parser, &local);
|
14248
14455
|
pm_parser_local_add_token(parser, &local, 1);
|
14249
14456
|
|
@@ -14279,10 +14486,10 @@ parse_parameters(
|
|
14279
14486
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
14280
14487
|
|
14281
14488
|
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
|
14282
|
-
uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
|
14489
|
+
uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
|
14283
14490
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
|
14284
14491
|
|
14285
|
-
if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
|
14492
|
+
if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
|
14286
14493
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
|
14287
14494
|
}
|
14288
14495
|
|
@@ -14454,7 +14661,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14454
14661
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14455
14662
|
|
14456
14663
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14457
|
-
reference = parse_target(parser, reference, false);
|
14664
|
+
reference = parse_target(parser, reference, false, false);
|
14458
14665
|
|
14459
14666
|
pm_rescue_node_reference_set(rescue, reference);
|
14460
14667
|
break;
|
@@ -14484,7 +14691,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14484
14691
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14485
14692
|
|
14486
14693
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14487
|
-
reference = parse_target(parser, reference, false);
|
14694
|
+
reference = parse_target(parser, reference, false, false);
|
14488
14695
|
|
14489
14696
|
pm_rescue_node_reference_set(rescue, reference);
|
14490
14697
|
break;
|
@@ -14689,6 +14896,28 @@ parse_block_parameters(
|
|
14689
14896
|
return block_parameters;
|
14690
14897
|
}
|
14691
14898
|
|
14899
|
+
/**
|
14900
|
+
* Return true if any of the visible scopes to the current context are using
|
14901
|
+
* numbered parameters.
|
14902
|
+
*/
|
14903
|
+
static bool
|
14904
|
+
outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
|
14905
|
+
for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
|
14906
|
+
if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
|
14907
|
+
}
|
14908
|
+
|
14909
|
+
return false;
|
14910
|
+
}
|
14911
|
+
|
14912
|
+
/**
|
14913
|
+
* These are the names of the various numbered parameters. We have them here so
|
14914
|
+
* that when we insert them into the constant pool we can use a constant string
|
14915
|
+
* and not have to allocate.
|
14916
|
+
*/
|
14917
|
+
static const char * const pm_numbered_parameter_names[] = {
|
14918
|
+
"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
|
14919
|
+
};
|
14920
|
+
|
14692
14921
|
/**
|
14693
14922
|
* Return the node that should be used in the parameters field of a block-like
|
14694
14923
|
* (block or lambda) node, depending on the kind of parameters that were
|
@@ -14696,31 +14925,79 @@ parse_block_parameters(
|
|
14696
14925
|
*/
|
14697
14926
|
static pm_node_t *
|
14698
14927
|
parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
|
14699
|
-
|
14928
|
+
pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
|
14929
|
+
|
14930
|
+
// If we have ordinary parameters, then we will return them as the set of
|
14931
|
+
// parameters.
|
14932
|
+
if (parameters != NULL) {
|
14933
|
+
// If we also have implicit parameters, then this is an error.
|
14934
|
+
if (implicit_parameters->size > 0) {
|
14935
|
+
pm_node_t *node = implicit_parameters->nodes[0];
|
14936
|
+
|
14937
|
+
if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
|
14938
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
|
14939
|
+
} else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
|
14940
|
+
pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
|
14941
|
+
} else {
|
14942
|
+
assert(false && "unreachable");
|
14943
|
+
}
|
14944
|
+
}
|
14700
14945
|
|
14701
|
-
if (masked == PM_SCOPE_PARAMETERS_NONE) {
|
14702
|
-
assert(parameters == NULL);
|
14703
|
-
return NULL;
|
14704
|
-
} else if (masked == PM_SCOPE_PARAMETERS_ORDINARY) {
|
14705
|
-
assert(parameters != NULL);
|
14706
14946
|
return parameters;
|
14707
|
-
}
|
14708
|
-
assert(parameters == NULL);
|
14947
|
+
}
|
14709
14948
|
|
14710
|
-
|
14711
|
-
|
14712
|
-
|
14713
|
-
|
14949
|
+
// If we don't have any implicit parameters, then the set of parameters is
|
14950
|
+
// NULL.
|
14951
|
+
if (implicit_parameters->size == 0) {
|
14952
|
+
return NULL;
|
14953
|
+
}
|
14954
|
+
|
14955
|
+
// If we don't have ordinary parameters, then we now must validate our set
|
14956
|
+
// of implicit parameters. We can only have numbered parameters or it, but
|
14957
|
+
// they cannot be mixed.
|
14958
|
+
uint8_t numbered_parameter = 0;
|
14959
|
+
bool it_parameter = false;
|
14960
|
+
|
14961
|
+
for (size_t index = 0; index < implicit_parameters->size; index++) {
|
14962
|
+
pm_node_t *node = implicit_parameters->nodes[index];
|
14963
|
+
|
14964
|
+
if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
|
14965
|
+
if (it_parameter) {
|
14966
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
|
14967
|
+
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
14968
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
|
14969
|
+
} else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
|
14970
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
|
14971
|
+
} else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
|
14972
|
+
numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
|
14973
|
+
} else {
|
14974
|
+
assert(false && "unreachable");
|
14975
|
+
}
|
14976
|
+
} else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
|
14977
|
+
if (numbered_parameter > 0) {
|
14978
|
+
pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
|
14979
|
+
} else {
|
14980
|
+
it_parameter = true;
|
14981
|
+
}
|
14714
14982
|
}
|
14983
|
+
}
|
14715
14984
|
|
14716
|
-
|
14717
|
-
|
14718
|
-
|
14985
|
+
if (numbered_parameter > 0) {
|
14986
|
+
// Go through the parent scopes and mark them as being disallowed from
|
14987
|
+
// using numbered parameters because this inner scope is using them.
|
14988
|
+
for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
|
14989
|
+
scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
|
14990
|
+
}
|
14991
|
+
|
14992
|
+
const pm_location_t location = { .start = opening->start, .end = closing->end };
|
14993
|
+
return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
|
14994
|
+
}
|
14995
|
+
|
14996
|
+
if (it_parameter) {
|
14719
14997
|
return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
|
14720
|
-
} else {
|
14721
|
-
assert(false && "unreachable");
|
14722
|
-
return NULL;
|
14723
14998
|
}
|
14999
|
+
|
15000
|
+
return NULL;
|
14724
15001
|
}
|
14725
15002
|
|
14726
15003
|
/**
|
@@ -14737,9 +15014,6 @@ parse_block(pm_parser_t *parser) {
|
|
14737
15014
|
pm_block_parameters_node_t *block_parameters = NULL;
|
14738
15015
|
|
14739
15016
|
if (accept1(parser, PM_TOKEN_PIPE)) {
|
14740
|
-
assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
|
14741
|
-
parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
|
14742
|
-
|
14743
15017
|
pm_token_t block_parameters_opening = parser->previous;
|
14744
15018
|
if (match1(parser, PM_TOKEN_PIPE)) {
|
14745
15019
|
block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
|
@@ -14808,7 +15082,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
14808
15082
|
arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
|
14809
15083
|
} else {
|
14810
15084
|
pm_accepts_block_stack_push(parser, true);
|
14811
|
-
parse_arguments(parser, arguments,
|
15085
|
+
parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT);
|
14812
15086
|
|
14813
15087
|
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
14814
15088
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
|
@@ -14826,7 +15100,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
14826
15100
|
// If we get here, then the subsequent token cannot be used as an infix
|
14827
15101
|
// operator. In this case we assume the subsequent token is part of an
|
14828
15102
|
// argument to this method call.
|
14829
|
-
parse_arguments(parser, arguments,
|
15103
|
+
parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF);
|
14830
15104
|
|
14831
15105
|
// If we have done with the arguments and still not consumed the comma,
|
14832
15106
|
// then we have a trailing comma where we need to check whether it is
|
@@ -14857,11 +15131,8 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
14857
15131
|
if (arguments->block == NULL && !arguments->has_forwarding) {
|
14858
15132
|
arguments->block = (pm_node_t *) block;
|
14859
15133
|
} else {
|
14860
|
-
|
14861
|
-
|
14862
|
-
} else {
|
14863
|
-
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
|
14864
|
-
}
|
15134
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
|
15135
|
+
|
14865
15136
|
if (arguments->block != NULL) {
|
14866
15137
|
if (arguments->arguments == NULL) {
|
14867
15138
|
arguments->arguments = pm_arguments_node_create(parser);
|
@@ -15240,7 +15511,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
|
|
15240
15511
|
#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
|
15241
15512
|
case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
|
15242
15513
|
case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
|
15243
|
-
case PM_NUMBERED_REFERENCE_READ_NODE
|
15514
|
+
case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
|
15244
15515
|
|
15245
15516
|
// Assert here that the flags are the same so that we can safely switch the type
|
15246
15517
|
// of the node without having to move the flags.
|
@@ -15298,6 +15569,10 @@ parse_string_part(pm_parser_t *parser) {
|
|
15298
15569
|
// "aaa #{bbb} #@ccc ddd"
|
15299
15570
|
// ^^^^^^
|
15300
15571
|
case PM_TOKEN_EMBEXPR_BEGIN: {
|
15572
|
+
// Ruby disallows seeing encoding around interpolation in strings,
|
15573
|
+
// even though it is known at parse time.
|
15574
|
+
parser->explicit_encoding = NULL;
|
15575
|
+
|
15301
15576
|
pm_lex_state_t state = parser->lex_state;
|
15302
15577
|
int brace_nesting = parser->brace_nesting;
|
15303
15578
|
|
@@ -15320,6 +15595,13 @@ parse_string_part(pm_parser_t *parser) {
|
|
15320
15595
|
expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
|
15321
15596
|
pm_token_t closing = parser->previous;
|
15322
15597
|
|
15598
|
+
// If this set of embedded statements only contains a single
|
15599
|
+
// statement, then Ruby does not consider it as a possible statement
|
15600
|
+
// that could emit a line event.
|
15601
|
+
if (statements != NULL && statements->body.size == 1) {
|
15602
|
+
pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
|
15603
|
+
}
|
15604
|
+
|
15323
15605
|
return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
|
15324
15606
|
}
|
15325
15607
|
|
@@ -15330,6 +15612,10 @@ parse_string_part(pm_parser_t *parser) {
|
|
15330
15612
|
// "aaa #{bbb} #@ccc ddd"
|
15331
15613
|
// ^^^^^
|
15332
15614
|
case PM_TOKEN_EMBVAR: {
|
15615
|
+
// Ruby disallows seeing encoding around interpolation in strings,
|
15616
|
+
// even though it is known at parse time.
|
15617
|
+
parser->explicit_encoding = NULL;
|
15618
|
+
|
15333
15619
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
15334
15620
|
parser_lex(parser);
|
15335
15621
|
|
@@ -15644,75 +15930,44 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
|
|
15644
15930
|
}
|
15645
15931
|
}
|
15646
15932
|
|
15647
|
-
/**
|
15648
|
-
* Return true if any of the visible scopes to the current context are using
|
15649
|
-
* numbered parameters.
|
15650
|
-
*/
|
15651
|
-
static bool
|
15652
|
-
outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
|
15653
|
-
for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
|
15654
|
-
if (scope->numbered_parameters > 0) return true;
|
15655
|
-
}
|
15656
|
-
|
15657
|
-
return false;
|
15658
|
-
}
|
15659
|
-
|
15660
|
-
/**
|
15661
|
-
* These are the names of the various numbered parameters. We have them here so
|
15662
|
-
* that when we insert them into the constant pool we can use a constant string
|
15663
|
-
* and not have to allocate.
|
15664
|
-
*/
|
15665
|
-
static const char * const pm_numbered_parameter_names[] = {
|
15666
|
-
"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
|
15667
|
-
};
|
15668
|
-
|
15669
15933
|
/**
|
15670
15934
|
* Parse an identifier into either a local variable read. If the local variable
|
15671
15935
|
* is not found, it returns NULL instead.
|
15672
15936
|
*/
|
15673
|
-
static
|
15937
|
+
static pm_node_t *
|
15674
15938
|
parse_variable(pm_parser_t *parser) {
|
15939
|
+
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
|
15675
15940
|
int depth;
|
15676
|
-
|
15677
|
-
|
15941
|
+
|
15942
|
+
if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
|
15943
|
+
return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
|
15678
15944
|
}
|
15679
15945
|
|
15680
15946
|
pm_scope_t *current_scope = parser->current_scope;
|
15681
|
-
if (!current_scope->closed && current_scope->
|
15682
|
-
|
15683
|
-
|
15684
|
-
|
15685
|
-
|
15686
|
-
|
15687
|
-
|
15688
|
-
|
15689
|
-
|
15690
|
-
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
15691
|
-
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
15692
|
-
} else {
|
15693
|
-
// Indicate that this scope is using numbered params so that child
|
15694
|
-
// scopes cannot. We subtract the value for the character '0' to get
|
15695
|
-
// the actual integer value of the number (only _1 through _9 are
|
15696
|
-
// valid).
|
15697
|
-
int8_t numbered_parameters = (int8_t) (parser->previous.start[1] - '0');
|
15698
|
-
current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED;
|
15699
|
-
|
15700
|
-
if (numbered_parameters > current_scope->numbered_parameters) {
|
15701
|
-
current_scope->numbered_parameters = numbered_parameters;
|
15947
|
+
if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
|
15948
|
+
if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
|
15949
|
+
// When you use a numbered parameter, it implies the existence of
|
15950
|
+
// all of the locals that exist before it. For example, referencing
|
15951
|
+
// _2 means that _1 must exist. Therefore here we loop through all
|
15952
|
+
// of the possibilities and add them into the constant pool.
|
15953
|
+
uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
|
15954
|
+
for (uint8_t number = 1; number <= maximum; number++) {
|
15955
|
+
pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
|
15702
15956
|
}
|
15703
15957
|
|
15704
|
-
|
15705
|
-
|
15706
|
-
// referencing _2 means that _1 must exist. Therefore here we
|
15707
|
-
// loop through all of the possibilities and add them into the
|
15708
|
-
// constant pool.
|
15709
|
-
for (int8_t numbered_param = 1; numbered_param <= numbered_parameters - 1; numbered_param++) {
|
15710
|
-
pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_param - 1], 2);
|
15958
|
+
if (!match1(parser, PM_TOKEN_EQUAL)) {
|
15959
|
+
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
|
15711
15960
|
}
|
15712
15961
|
|
15713
|
-
|
15714
|
-
|
15715
|
-
|
15962
|
+
pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
|
15963
|
+
pm_node_list_append(¤t_scope->implicit_parameters, node);
|
15964
|
+
|
15965
|
+
return node;
|
15966
|
+
} else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
|
15967
|
+
pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
|
15968
|
+
pm_node_list_append(¤t_scope->implicit_parameters, node);
|
15969
|
+
|
15970
|
+
return node;
|
15716
15971
|
}
|
15717
15972
|
}
|
15718
15973
|
|
@@ -15727,8 +15982,8 @@ parse_variable_call(pm_parser_t *parser) {
|
|
15727
15982
|
pm_node_flags_t flags = 0;
|
15728
15983
|
|
15729
15984
|
if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
|
15730
|
-
|
15731
|
-
if (node != NULL) return
|
15985
|
+
pm_node_t *node = parse_variable(parser);
|
15986
|
+
if (node != NULL) return node;
|
15732
15987
|
flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
|
15733
15988
|
}
|
15734
15989
|
|
@@ -15846,127 +16101,355 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
|
|
15846
16101
|
nodes->size = write_index;
|
15847
16102
|
}
|
15848
16103
|
|
15849
|
-
static pm_node_t *
|
15850
|
-
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id);
|
15851
|
-
|
15852
16104
|
/**
|
15853
|
-
*
|
15854
|
-
* expression. If it is duplicated from a previous local, then we'll need to add
|
15855
|
-
* an error to the parser.
|
16105
|
+
* Return a string content token at a particular location that is empty.
|
15856
16106
|
*/
|
15857
|
-
static
|
15858
|
-
|
15859
|
-
|
15860
|
-
if (*location->start == '_') return;
|
15861
|
-
|
15862
|
-
if (pm_constant_id_list_includes(captures, capture)) {
|
15863
|
-
pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
|
15864
|
-
} else {
|
15865
|
-
pm_constant_id_list_append(captures, capture);
|
15866
|
-
}
|
16107
|
+
static pm_token_t
|
16108
|
+
parse_strings_empty_content(const uint8_t *location) {
|
16109
|
+
return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
|
15867
16110
|
}
|
15868
16111
|
|
15869
16112
|
/**
|
15870
|
-
*
|
16113
|
+
* Parse a set of strings that could be concatenated together.
|
15871
16114
|
*/
|
15872
|
-
static pm_node_t *
|
15873
|
-
|
15874
|
-
|
15875
|
-
// path nodes.
|
15876
|
-
while (accept1(parser, PM_TOKEN_COLON_COLON)) {
|
15877
|
-
pm_token_t delimiter = parser->previous;
|
15878
|
-
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
15879
|
-
node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
15880
|
-
}
|
15881
|
-
|
15882
|
-
// If there is a [ or ( that follows, then this is part of a larger pattern
|
15883
|
-
// expression. We'll parse the inner pattern here, then modify the returned
|
15884
|
-
// inner pattern with our constant path attached.
|
15885
|
-
if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
|
15886
|
-
return node;
|
15887
|
-
}
|
16115
|
+
static inline pm_node_t *
|
16116
|
+
parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
16117
|
+
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
|
15888
16118
|
|
15889
|
-
|
15890
|
-
|
15891
|
-
pm_node_t *inner = NULL;
|
16119
|
+
bool concating = false;
|
16120
|
+
bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
|
15892
16121
|
|
15893
|
-
|
15894
|
-
|
15895
|
-
accept1(parser, PM_TOKEN_NEWLINE);
|
16122
|
+
while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
16123
|
+
pm_node_t *node = NULL;
|
15896
16124
|
|
15897
|
-
|
15898
|
-
|
15899
|
-
|
15900
|
-
|
15901
|
-
|
16125
|
+
// Here we have found a string literal. We'll parse it and add it to
|
16126
|
+
// the list of strings.
|
16127
|
+
const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
16128
|
+
assert(lex_mode->mode == PM_LEX_STRING);
|
16129
|
+
bool lex_interpolation = lex_mode->as.string.interpolation;
|
15902
16130
|
|
15903
|
-
|
15904
|
-
} else {
|
16131
|
+
pm_token_t opening = parser->current;
|
15905
16132
|
parser_lex(parser);
|
15906
|
-
opening = parser->previous;
|
15907
|
-
accept1(parser, PM_TOKEN_NEWLINE);
|
15908
|
-
|
15909
|
-
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
15910
|
-
inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
|
15911
|
-
accept1(parser, PM_TOKEN_NEWLINE);
|
15912
|
-
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
|
15913
|
-
}
|
15914
|
-
|
15915
|
-
closing = parser->previous;
|
15916
|
-
}
|
15917
|
-
|
15918
|
-
if (!inner) {
|
15919
|
-
// If there was no inner pattern, then we have something like Foo() or
|
15920
|
-
// Foo[]. In that case we'll create an array pattern with no requireds.
|
15921
|
-
return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
|
15922
|
-
}
|
15923
16133
|
|
15924
|
-
|
15925
|
-
|
15926
|
-
|
15927
|
-
|
15928
|
-
|
15929
|
-
|
15930
|
-
|
16134
|
+
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16135
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16136
|
+
// If we get here, then we have an end immediately after a
|
16137
|
+
// start. In that case we'll create an empty content token and
|
16138
|
+
// return an uninterpolated string.
|
16139
|
+
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16140
|
+
pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
|
15931
16141
|
|
15932
|
-
|
15933
|
-
|
15934
|
-
|
16142
|
+
pm_string_shared_init(&string->unescaped, content.start, content.end);
|
16143
|
+
node = (pm_node_t *) string;
|
16144
|
+
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16145
|
+
// If we get here, then we have an end of a label immediately
|
16146
|
+
// after a start. In that case we'll create an empty symbol
|
16147
|
+
// node.
|
16148
|
+
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16149
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
|
15935
16150
|
|
15936
|
-
|
15937
|
-
|
15938
|
-
|
16151
|
+
pm_string_shared_init(&symbol->unescaped, content.start, content.end);
|
16152
|
+
node = (pm_node_t *) symbol;
|
16153
|
+
} else if (!lex_interpolation) {
|
16154
|
+
// If we don't accept interpolation then we expect the string to
|
16155
|
+
// start with a single string content node.
|
16156
|
+
pm_string_t unescaped;
|
16157
|
+
pm_token_t content;
|
15939
16158
|
|
15940
|
-
|
16159
|
+
if (match1(parser, PM_TOKEN_EOF)) {
|
16160
|
+
unescaped = PM_STRING_EMPTY;
|
16161
|
+
content = not_provided(parser);
|
16162
|
+
} else {
|
16163
|
+
unescaped = parser->current_string;
|
16164
|
+
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
|
16165
|
+
content = parser->previous;
|
15941
16166
|
}
|
15942
16167
|
|
15943
|
-
|
15944
|
-
|
15945
|
-
|
15946
|
-
|
15947
|
-
|
15948
|
-
|
15949
|
-
|
15950
|
-
|
15951
|
-
|
15952
|
-
|
15953
|
-
|
15954
|
-
|
16168
|
+
// It is unfortunately possible to have multiple string content
|
16169
|
+
// nodes in a row in the case that there's heredoc content in
|
16170
|
+
// the middle of the string, like this cursed example:
|
16171
|
+
//
|
16172
|
+
// <<-END+'b
|
16173
|
+
// a
|
16174
|
+
// END
|
16175
|
+
// c'+'d'
|
16176
|
+
//
|
16177
|
+
// In that case we need to switch to an interpolated string to
|
16178
|
+
// be able to contain all of the parts.
|
16179
|
+
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16180
|
+
pm_node_list_t parts = { 0 };
|
15955
16181
|
|
15956
|
-
|
15957
|
-
|
16182
|
+
pm_token_t delimiters = not_provided(parser);
|
16183
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
|
16184
|
+
pm_node_list_append(&parts, part);
|
15958
16185
|
|
15959
|
-
|
15960
|
-
|
15961
|
-
|
15962
|
-
|
16186
|
+
do {
|
16187
|
+
part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
|
16188
|
+
pm_node_list_append(&parts, part);
|
16189
|
+
parser_lex(parser);
|
16190
|
+
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
15963
16191
|
|
15964
|
-
|
15965
|
-
|
15966
|
-
pattern_node->base.location.end = closing.end;
|
16192
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16193
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
15967
16194
|
|
15968
|
-
|
15969
|
-
|
16195
|
+
pm_node_list_free(&parts);
|
16196
|
+
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16197
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16198
|
+
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16199
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
16200
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16201
|
+
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
|
16202
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16203
|
+
} else {
|
16204
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
|
16205
|
+
parser->previous.start = parser->previous.end;
|
16206
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
16207
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16208
|
+
}
|
16209
|
+
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16210
|
+
// In this case we've hit string content so we know the string
|
16211
|
+
// at least has something in it. We'll need to check if the
|
16212
|
+
// following token is the end (in which case we can return a
|
16213
|
+
// plain string) or if it's not then it has interpolation.
|
16214
|
+
pm_token_t content = parser->current;
|
16215
|
+
pm_string_t unescaped = parser->current_string;
|
16216
|
+
parser_lex(parser);
|
16217
|
+
|
16218
|
+
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16219
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16220
|
+
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
16221
|
+
|
16222
|
+
// Kind of odd behavior, but basically if we have an
|
16223
|
+
// unterminated string and it ends in a newline, we back up one
|
16224
|
+
// character so that the error message is on the last line of
|
16225
|
+
// content in the string.
|
16226
|
+
if (!accept1(parser, PM_TOKEN_STRING_END)) {
|
16227
|
+
const uint8_t *location = parser->previous.end;
|
16228
|
+
if (location > parser->start && location[-1] == '\n') location--;
|
16229
|
+
pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
|
16230
|
+
|
16231
|
+
parser->previous.start = parser->previous.end;
|
16232
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
16233
|
+
}
|
16234
|
+
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16235
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16236
|
+
} else {
|
16237
|
+
// If we get here, then we have interpolation so we'll need
|
16238
|
+
// to create a string or symbol node with interpolation.
|
16239
|
+
pm_node_list_t parts = { 0 };
|
16240
|
+
pm_token_t string_opening = not_provided(parser);
|
16241
|
+
pm_token_t string_closing = not_provided(parser);
|
16242
|
+
|
16243
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
|
16244
|
+
pm_node_flag_set(part, parse_unescaped_encoding(parser));
|
16245
|
+
pm_node_list_append(&parts, part);
|
16246
|
+
|
16247
|
+
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16248
|
+
if ((part = parse_string_part(parser)) != NULL) {
|
16249
|
+
pm_node_list_append(&parts, part);
|
16250
|
+
}
|
16251
|
+
}
|
16252
|
+
|
16253
|
+
if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16254
|
+
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16255
|
+
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16256
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16257
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16258
|
+
} else {
|
16259
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16260
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16261
|
+
}
|
16262
|
+
|
16263
|
+
pm_node_list_free(&parts);
|
16264
|
+
}
|
16265
|
+
} else {
|
16266
|
+
// If we get here, then the first part of the string is not plain
|
16267
|
+
// string content, in which case we need to parse the string as an
|
16268
|
+
// interpolated string.
|
16269
|
+
pm_node_list_t parts = { 0 };
|
16270
|
+
pm_node_t *part;
|
16271
|
+
|
16272
|
+
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16273
|
+
if ((part = parse_string_part(parser)) != NULL) {
|
16274
|
+
pm_node_list_append(&parts, part);
|
16275
|
+
}
|
16276
|
+
}
|
16277
|
+
|
16278
|
+
if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16279
|
+
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16280
|
+
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16281
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16282
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16283
|
+
} else {
|
16284
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16285
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16286
|
+
}
|
16287
|
+
|
16288
|
+
pm_node_list_free(&parts);
|
16289
|
+
}
|
16290
|
+
|
16291
|
+
if (current == NULL) {
|
16292
|
+
// If the node we just parsed is a symbol node, then we can't
|
16293
|
+
// concatenate it with anything else, so we can now return that
|
16294
|
+
// node.
|
16295
|
+
if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
|
16296
|
+
return node;
|
16297
|
+
}
|
16298
|
+
|
16299
|
+
// If we don't already have a node, then it's fine and we can just
|
16300
|
+
// set the result to be the node we just parsed.
|
16301
|
+
current = node;
|
16302
|
+
} else {
|
16303
|
+
// Otherwise we need to check the type of the node we just parsed.
|
16304
|
+
// If it cannot be concatenated with the previous node, then we'll
|
16305
|
+
// need to add a syntax error.
|
16306
|
+
if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
|
16307
|
+
pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
|
16308
|
+
}
|
16309
|
+
|
16310
|
+
// If we haven't already created our container for concatenation,
|
16311
|
+
// we'll do that now.
|
16312
|
+
if (!concating) {
|
16313
|
+
concating = true;
|
16314
|
+
pm_token_t bounds = not_provided(parser);
|
16315
|
+
|
16316
|
+
pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
|
16317
|
+
pm_interpolated_string_node_append(container, current);
|
16318
|
+
current = (pm_node_t *) container;
|
16319
|
+
}
|
16320
|
+
|
16321
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
|
16322
|
+
}
|
16323
|
+
}
|
16324
|
+
|
16325
|
+
return current;
|
16326
|
+
}
|
16327
|
+
|
16328
|
+
#define PM_PARSE_PATTERN_SINGLE 0
|
16329
|
+
#define PM_PARSE_PATTERN_TOP 1
|
16330
|
+
#define PM_PARSE_PATTERN_MULTI 2
|
16331
|
+
|
16332
|
+
static pm_node_t *
|
16333
|
+
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id);
|
16334
|
+
|
16335
|
+
/**
|
16336
|
+
* Add the newly created local to the list of captures for this pattern matching
|
16337
|
+
* expression. If it is duplicated from a previous local, then we'll need to add
|
16338
|
+
* an error to the parser.
|
16339
|
+
*/
|
16340
|
+
static void
|
16341
|
+
parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
|
16342
|
+
// Skip this capture if it starts with an underscore.
|
16343
|
+
if (*location->start == '_') return;
|
16344
|
+
|
16345
|
+
if (pm_constant_id_list_includes(captures, capture)) {
|
16346
|
+
pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
|
16347
|
+
} else {
|
16348
|
+
pm_constant_id_list_append(captures, capture);
|
16349
|
+
}
|
16350
|
+
}
|
16351
|
+
|
16352
|
+
/**
|
16353
|
+
* Accept any number of constants joined by :: delimiters.
|
16354
|
+
*/
|
16355
|
+
static pm_node_t *
|
16356
|
+
parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node) {
|
16357
|
+
// Now, if there are any :: operators that follow, parse them as constant
|
16358
|
+
// path nodes.
|
16359
|
+
while (accept1(parser, PM_TOKEN_COLON_COLON)) {
|
16360
|
+
pm_token_t delimiter = parser->previous;
|
16361
|
+
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
16362
|
+
node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
16363
|
+
}
|
16364
|
+
|
16365
|
+
// If there is a [ or ( that follows, then this is part of a larger pattern
|
16366
|
+
// expression. We'll parse the inner pattern here, then modify the returned
|
16367
|
+
// inner pattern with our constant path attached.
|
16368
|
+
if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
|
16369
|
+
return node;
|
16370
|
+
}
|
16371
|
+
|
16372
|
+
pm_token_t opening;
|
16373
|
+
pm_token_t closing;
|
16374
|
+
pm_node_t *inner = NULL;
|
16375
|
+
|
16376
|
+
if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
|
16377
|
+
opening = parser->previous;
|
16378
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
16379
|
+
|
16380
|
+
if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
|
16381
|
+
inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
|
16382
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
16383
|
+
expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
|
16384
|
+
}
|
16385
|
+
|
16386
|
+
closing = parser->previous;
|
16387
|
+
} else {
|
16388
|
+
parser_lex(parser);
|
16389
|
+
opening = parser->previous;
|
16390
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
16391
|
+
|
16392
|
+
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
16393
|
+
inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
|
16394
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
16395
|
+
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
|
16396
|
+
}
|
16397
|
+
|
16398
|
+
closing = parser->previous;
|
16399
|
+
}
|
16400
|
+
|
16401
|
+
if (!inner) {
|
16402
|
+
// If there was no inner pattern, then we have something like Foo() or
|
16403
|
+
// Foo[]. In that case we'll create an array pattern with no requireds.
|
16404
|
+
return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
|
16405
|
+
}
|
16406
|
+
|
16407
|
+
// Now that we have the inner pattern, check to see if it's an array, find,
|
16408
|
+
// or hash pattern. If it is, then we'll attach our constant path to it if
|
16409
|
+
// it doesn't already have a constant. If it's not one of those node types
|
16410
|
+
// or it does have a constant, then we'll create an array pattern.
|
16411
|
+
switch (PM_NODE_TYPE(inner)) {
|
16412
|
+
case PM_ARRAY_PATTERN_NODE: {
|
16413
|
+
pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
|
16414
|
+
|
16415
|
+
if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
|
16416
|
+
pattern_node->base.location.start = node->location.start;
|
16417
|
+
pattern_node->base.location.end = closing.end;
|
16418
|
+
|
16419
|
+
pattern_node->constant = node;
|
16420
|
+
pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
|
16421
|
+
pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
|
16422
|
+
|
16423
|
+
return (pm_node_t *) pattern_node;
|
16424
|
+
}
|
16425
|
+
|
16426
|
+
break;
|
16427
|
+
}
|
16428
|
+
case PM_FIND_PATTERN_NODE: {
|
16429
|
+
pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
|
16430
|
+
|
16431
|
+
if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
|
16432
|
+
pattern_node->base.location.start = node->location.start;
|
16433
|
+
pattern_node->base.location.end = closing.end;
|
16434
|
+
|
16435
|
+
pattern_node->constant = node;
|
16436
|
+
pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
|
16437
|
+
pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
|
16438
|
+
|
16439
|
+
return (pm_node_t *) pattern_node;
|
16440
|
+
}
|
16441
|
+
|
16442
|
+
break;
|
16443
|
+
}
|
16444
|
+
case PM_HASH_PATTERN_NODE: {
|
16445
|
+
pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
|
16446
|
+
|
16447
|
+
if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
|
16448
|
+
pattern_node->base.location.start = node->location.start;
|
16449
|
+
pattern_node->base.location.end = closing.end;
|
16450
|
+
|
16451
|
+
pattern_node->constant = node;
|
16452
|
+
pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
|
15970
16453
|
pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
|
15971
16454
|
|
15972
16455
|
return (pm_node_t *) pattern_node;
|
@@ -16055,6 +16538,33 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
|
|
16055
16538
|
return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
|
16056
16539
|
}
|
16057
16540
|
|
16541
|
+
/**
|
16542
|
+
* Check that the slice of the source given by the bounds parameters constitutes
|
16543
|
+
* a valid local variable name.
|
16544
|
+
*/
|
16545
|
+
static bool
|
16546
|
+
pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
16547
|
+
ptrdiff_t length = end - start;
|
16548
|
+
if (length == 0) return false;
|
16549
|
+
|
16550
|
+
// First ensure that it starts with a valid identifier starting character.
|
16551
|
+
size_t width = char_is_identifier_start(parser, start);
|
16552
|
+
if (width == 0) return false;
|
16553
|
+
|
16554
|
+
// Next, ensure that it's not an uppercase character.
|
16555
|
+
if (parser->encoding_changed) {
|
16556
|
+
if (parser->encoding->isupper_char(start, length)) return false;
|
16557
|
+
} else {
|
16558
|
+
if (pm_encoding_utf_8_isupper_char(start, length)) return false;
|
16559
|
+
}
|
16560
|
+
|
16561
|
+
// Next, iterate through all of the bytes of the string to ensure that they
|
16562
|
+
// are all valid identifier characters.
|
16563
|
+
const uint8_t *cursor = start + width;
|
16564
|
+
while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
|
16565
|
+
return cursor == end;
|
16566
|
+
}
|
16567
|
+
|
16058
16568
|
/**
|
16059
16569
|
* Create an implicit node for the value of a hash pattern that has omitted the
|
16060
16570
|
* value. This will use an implicit local variable target.
|
@@ -16062,14 +16572,18 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
|
|
16062
16572
|
static pm_node_t *
|
16063
16573
|
parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
|
16064
16574
|
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
|
16065
|
-
pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
|
16066
16575
|
|
16576
|
+
pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
|
16067
16577
|
int depth = -1;
|
16068
|
-
|
16069
|
-
|
16070
|
-
PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
|
16071
|
-
} else {
|
16578
|
+
|
16579
|
+
if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
|
16072
16580
|
depth = pm_parser_local_depth_constant_id(parser, constant_id);
|
16581
|
+
} else {
|
16582
|
+
pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
|
16583
|
+
|
16584
|
+
if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
|
16585
|
+
PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
|
16586
|
+
}
|
16073
16587
|
}
|
16074
16588
|
|
16075
16589
|
if (depth == -1) {
|
@@ -16093,7 +16607,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
|
|
16093
16607
|
*/
|
16094
16608
|
static void
|
16095
16609
|
parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
|
16096
|
-
if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
|
16610
|
+
if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
|
16097
16611
|
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
|
16098
16612
|
}
|
16099
16613
|
}
|
@@ -16124,7 +16638,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
16124
16638
|
} else {
|
16125
16639
|
// Here we have a value for the first assoc in the list, so
|
16126
16640
|
// we will parse it now.
|
16127
|
-
value = parse_pattern(parser, captures,
|
16641
|
+
value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
|
16128
16642
|
}
|
16129
16643
|
|
16130
16644
|
pm_token_t operator = not_provided(parser);
|
@@ -16139,7 +16653,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
16139
16653
|
// If we get anything else, then this is an error. For this we'll
|
16140
16654
|
// create a missing node for the value and create an assoc node for
|
16141
16655
|
// the first node in the list.
|
16142
|
-
|
16656
|
+
pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
|
16657
|
+
pm_parser_err_node(parser, first_node, diag_id);
|
16143
16658
|
|
16144
16659
|
pm_token_t operator = not_provided(parser);
|
16145
16660
|
pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
|
@@ -16167,8 +16682,20 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
16167
16682
|
pm_node_list_append(&assocs, assoc);
|
16168
16683
|
}
|
16169
16684
|
} else {
|
16170
|
-
|
16171
|
-
|
16685
|
+
pm_node_t *key;
|
16686
|
+
|
16687
|
+
if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
16688
|
+
key = parse_strings(parser, NULL);
|
16689
|
+
|
16690
|
+
if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
|
16691
|
+
pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
|
16692
|
+
} else if (!pm_symbol_node_label_p(key)) {
|
16693
|
+
pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
|
16694
|
+
}
|
16695
|
+
} else {
|
16696
|
+
expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
|
16697
|
+
key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
16698
|
+
}
|
16172
16699
|
|
16173
16700
|
parse_pattern_hash_key(parser, &keys, key);
|
16174
16701
|
pm_node_t *value = NULL;
|
@@ -16176,7 +16703,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
16176
16703
|
if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
16177
16704
|
value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
|
16178
16705
|
} else {
|
16179
|
-
value = parse_pattern(parser, captures,
|
16706
|
+
value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
|
16180
16707
|
}
|
16181
16708
|
|
16182
16709
|
pm_token_t operator = not_provided(parser);
|
@@ -16233,7 +16760,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
16233
16760
|
|
16234
16761
|
// Otherwise, we'll parse the inner pattern, then deal with it depending
|
16235
16762
|
// on the type it returns.
|
16236
|
-
pm_node_t *inner = parse_pattern(parser, captures,
|
16763
|
+
pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
|
16237
16764
|
|
16238
16765
|
accept1(parser, PM_TOKEN_NEWLINE);
|
16239
16766
|
expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
|
@@ -16300,11 +16827,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
16300
16827
|
first_node = parse_pattern_keyword_rest(parser, captures);
|
16301
16828
|
break;
|
16302
16829
|
case PM_TOKEN_STRING_BEGIN:
|
16303
|
-
first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false,
|
16830
|
+
first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY_LABEL);
|
16304
16831
|
break;
|
16305
16832
|
default: {
|
16833
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
|
16306
16834
|
parser_lex(parser);
|
16307
|
-
pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
|
16308
16835
|
|
16309
16836
|
first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
16310
16837
|
break;
|
@@ -16380,19 +16907,8 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
16380
16907
|
pm_node_t *variable = (pm_node_t *) parse_variable(parser);
|
16381
16908
|
|
16382
16909
|
if (variable == NULL) {
|
16383
|
-
|
16384
|
-
|
16385
|
-
!parser->current_scope->closed &&
|
16386
|
-
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
16387
|
-
pm_token_is_it(parser->previous.start, parser->previous.end)
|
16388
|
-
) {
|
16389
|
-
pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
|
16390
|
-
if (read == NULL) read = pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
16391
|
-
variable = (pm_node_t *) read;
|
16392
|
-
} else {
|
16393
|
-
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
|
16394
|
-
variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
|
16395
|
-
}
|
16910
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
|
16911
|
+
variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
|
16396
16912
|
}
|
16397
16913
|
|
16398
16914
|
return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
|
@@ -16506,7 +17022,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
|
|
16506
17022
|
pm_token_t opening = parser->current;
|
16507
17023
|
parser_lex(parser);
|
16508
17024
|
|
16509
|
-
pm_node_t *body = parse_pattern(parser, captures,
|
17025
|
+
pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
|
16510
17026
|
accept1(parser, PM_TOKEN_NEWLINE);
|
16511
17027
|
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
|
16512
17028
|
pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
|
@@ -16565,7 +17081,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
|
|
16565
17081
|
* Parse a pattern matching expression.
|
16566
17082
|
*/
|
16567
17083
|
static pm_node_t *
|
16568
|
-
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures,
|
17084
|
+
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id) {
|
16569
17085
|
pm_node_t *node = NULL;
|
16570
17086
|
|
16571
17087
|
bool leading_rest = false;
|
@@ -16575,14 +17091,26 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
|
|
16575
17091
|
case PM_TOKEN_LABEL: {
|
16576
17092
|
parser_lex(parser);
|
16577
17093
|
pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
16578
|
-
|
17094
|
+
node = (pm_node_t *) parse_pattern_hash(parser, captures, key);
|
17095
|
+
|
17096
|
+
if (!(flags & PM_PARSE_PATTERN_TOP)) {
|
17097
|
+
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
|
17098
|
+
}
|
17099
|
+
|
17100
|
+
return node;
|
16579
17101
|
}
|
16580
17102
|
case PM_TOKEN_USTAR_STAR: {
|
16581
17103
|
node = parse_pattern_keyword_rest(parser, captures);
|
16582
|
-
|
17104
|
+
node = (pm_node_t *) parse_pattern_hash(parser, captures, node);
|
17105
|
+
|
17106
|
+
if (!(flags & PM_PARSE_PATTERN_TOP)) {
|
17107
|
+
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
|
17108
|
+
}
|
17109
|
+
|
17110
|
+
return node;
|
16583
17111
|
}
|
16584
17112
|
case PM_TOKEN_USTAR: {
|
16585
|
-
if (
|
17113
|
+
if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
|
16586
17114
|
parser_lex(parser);
|
16587
17115
|
node = (pm_node_t *) parse_pattern_rest(parser, captures);
|
16588
17116
|
leading_rest = true;
|
@@ -16601,7 +17129,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
|
|
16601
17129
|
return (pm_node_t *) parse_pattern_hash(parser, captures, node);
|
16602
17130
|
}
|
16603
17131
|
|
16604
|
-
if (
|
17132
|
+
if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
|
16605
17133
|
// If we have a comma, then we are now parsing either an array pattern or a
|
16606
17134
|
// find pattern. We need to parse all of the patterns, put them into a big
|
16607
17135
|
// list, and then determine which type of node we have.
|
@@ -16642,262 +17170,53 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
|
|
16642
17170
|
if (PM_NODE_TYPE_P(nodes.nodes[0], PM_SPLAT_NODE) && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
|
16643
17171
|
node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
|
16644
17172
|
} else {
|
16645
|
-
node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
|
16646
|
-
}
|
16647
|
-
|
16648
|
-
xfree(nodes.nodes);
|
16649
|
-
} else if (leading_rest) {
|
16650
|
-
// Otherwise, if we parsed a single splat pattern, then we know we have an
|
16651
|
-
// array pattern, so we can go ahead and create that node.
|
16652
|
-
node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
|
16653
|
-
}
|
16654
|
-
|
16655
|
-
return node;
|
16656
|
-
}
|
16657
|
-
|
16658
|
-
/**
|
16659
|
-
* Incorporate a negative sign into a numeric node by subtracting 1 character
|
16660
|
-
* from its start bounds. If it's a compound node, then we will recursively
|
16661
|
-
* apply this function to its value.
|
16662
|
-
*/
|
16663
|
-
static inline void
|
16664
|
-
parse_negative_numeric(pm_node_t *node) {
|
16665
|
-
switch (PM_NODE_TYPE(node)) {
|
16666
|
-
case PM_INTEGER_NODE: {
|
16667
|
-
pm_integer_node_t *cast = (pm_integer_node_t *) node;
|
16668
|
-
cast->base.location.start--;
|
16669
|
-
cast->value.negative = true;
|
16670
|
-
break;
|
16671
|
-
}
|
16672
|
-
case PM_FLOAT_NODE: {
|
16673
|
-
pm_float_node_t *cast = (pm_float_node_t *) node;
|
16674
|
-
cast->base.location.start--;
|
16675
|
-
cast->value = -cast->value;
|
16676
|
-
break;
|
16677
|
-
}
|
16678
|
-
case PM_RATIONAL_NODE:
|
16679
|
-
node->location.start--;
|
16680
|
-
parse_negative_numeric(((pm_rational_node_t *) node)->numeric);
|
16681
|
-
break;
|
16682
|
-
case PM_IMAGINARY_NODE:
|
16683
|
-
node->location.start--;
|
16684
|
-
parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
|
16685
|
-
break;
|
16686
|
-
default:
|
16687
|
-
assert(false && "unreachable");
|
16688
|
-
break;
|
16689
|
-
}
|
16690
|
-
}
|
16691
|
-
|
16692
|
-
/**
|
16693
|
-
* Return a string content token at a particular location that is empty.
|
16694
|
-
*/
|
16695
|
-
static pm_token_t
|
16696
|
-
parse_strings_empty_content(const uint8_t *location) {
|
16697
|
-
return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
|
16698
|
-
}
|
16699
|
-
|
16700
|
-
/**
|
16701
|
-
* Parse a set of strings that could be concatenated together.
|
16702
|
-
*/
|
16703
|
-
static inline pm_node_t *
|
16704
|
-
parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
16705
|
-
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
|
16706
|
-
|
16707
|
-
bool concating = false;
|
16708
|
-
bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
|
16709
|
-
|
16710
|
-
while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
16711
|
-
pm_node_t *node = NULL;
|
16712
|
-
|
16713
|
-
// Here we have found a string literal. We'll parse it and add it to
|
16714
|
-
// the list of strings.
|
16715
|
-
const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
16716
|
-
assert(lex_mode->mode == PM_LEX_STRING);
|
16717
|
-
bool lex_interpolation = lex_mode->as.string.interpolation;
|
16718
|
-
|
16719
|
-
pm_token_t opening = parser->current;
|
16720
|
-
parser_lex(parser);
|
16721
|
-
|
16722
|
-
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16723
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16724
|
-
// If we get here, then we have an end immediately after a
|
16725
|
-
// start. In that case we'll create an empty content token and
|
16726
|
-
// return an uninterpolated string.
|
16727
|
-
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16728
|
-
pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
|
16729
|
-
|
16730
|
-
pm_string_shared_init(&string->unescaped, content.start, content.end);
|
16731
|
-
node = (pm_node_t *) string;
|
16732
|
-
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16733
|
-
// If we get here, then we have an end of a label immediately
|
16734
|
-
// after a start. In that case we'll create an empty symbol
|
16735
|
-
// node.
|
16736
|
-
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16737
|
-
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
|
16738
|
-
|
16739
|
-
pm_string_shared_init(&symbol->unescaped, content.start, content.end);
|
16740
|
-
node = (pm_node_t *) symbol;
|
16741
|
-
} else if (!lex_interpolation) {
|
16742
|
-
// If we don't accept interpolation then we expect the string to
|
16743
|
-
// start with a single string content node.
|
16744
|
-
pm_string_t unescaped;
|
16745
|
-
pm_token_t content;
|
16746
|
-
if (match1(parser, PM_TOKEN_EOF)) {
|
16747
|
-
unescaped = PM_STRING_EMPTY;
|
16748
|
-
content = not_provided(parser);
|
16749
|
-
} else {
|
16750
|
-
unescaped = parser->current_string;
|
16751
|
-
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
|
16752
|
-
content = parser->previous;
|
16753
|
-
}
|
16754
|
-
|
16755
|
-
// It is unfortunately possible to have multiple string content
|
16756
|
-
// nodes in a row in the case that there's heredoc content in
|
16757
|
-
// the middle of the string, like this cursed example:
|
16758
|
-
//
|
16759
|
-
// <<-END+'b
|
16760
|
-
// a
|
16761
|
-
// END
|
16762
|
-
// c'+'d'
|
16763
|
-
//
|
16764
|
-
// In that case we need to switch to an interpolated string to
|
16765
|
-
// be able to contain all of the parts.
|
16766
|
-
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16767
|
-
pm_node_list_t parts = { 0 };
|
16768
|
-
|
16769
|
-
pm_token_t delimiters = not_provided(parser);
|
16770
|
-
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
|
16771
|
-
pm_node_list_append(&parts, part);
|
16772
|
-
|
16773
|
-
do {
|
16774
|
-
part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
|
16775
|
-
pm_node_list_append(&parts, part);
|
16776
|
-
parser_lex(parser);
|
16777
|
-
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
16778
|
-
|
16779
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16780
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16781
|
-
|
16782
|
-
pm_node_list_free(&parts);
|
16783
|
-
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16784
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16785
|
-
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16786
|
-
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
16787
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16788
|
-
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
|
16789
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16790
|
-
} else {
|
16791
|
-
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
|
16792
|
-
parser->previous.start = parser->previous.end;
|
16793
|
-
parser->previous.type = PM_TOKEN_MISSING;
|
16794
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16795
|
-
}
|
16796
|
-
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16797
|
-
// In this case we've hit string content so we know the string
|
16798
|
-
// at least has something in it. We'll need to check if the
|
16799
|
-
// following token is the end (in which case we can return a
|
16800
|
-
// plain string) or if it's not then it has interpolation.
|
16801
|
-
pm_token_t content = parser->current;
|
16802
|
-
pm_string_t unescaped = parser->current_string;
|
16803
|
-
parser_lex(parser);
|
16804
|
-
|
16805
|
-
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16806
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16807
|
-
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
16808
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16809
|
-
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16810
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16811
|
-
} else {
|
16812
|
-
// If we get here, then we have interpolation so we'll need
|
16813
|
-
// to create a string or symbol node with interpolation.
|
16814
|
-
pm_node_list_t parts = { 0 };
|
16815
|
-
pm_token_t string_opening = not_provided(parser);
|
16816
|
-
pm_token_t string_closing = not_provided(parser);
|
16817
|
-
|
16818
|
-
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
|
16819
|
-
pm_node_flag_set(part, parse_unescaped_encoding(parser));
|
16820
|
-
pm_node_list_append(&parts, part);
|
16821
|
-
|
16822
|
-
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16823
|
-
if ((part = parse_string_part(parser)) != NULL) {
|
16824
|
-
pm_node_list_append(&parts, part);
|
16825
|
-
}
|
16826
|
-
}
|
16827
|
-
|
16828
|
-
if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16829
|
-
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16830
|
-
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16831
|
-
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16832
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16833
|
-
} else {
|
16834
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16835
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16836
|
-
}
|
16837
|
-
|
16838
|
-
pm_node_list_free(&parts);
|
16839
|
-
}
|
16840
|
-
} else {
|
16841
|
-
// If we get here, then the first part of the string is not plain
|
16842
|
-
// string content, in which case we need to parse the string as an
|
16843
|
-
// interpolated string.
|
16844
|
-
pm_node_list_t parts = { 0 };
|
16845
|
-
pm_node_t *part;
|
16846
|
-
|
16847
|
-
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16848
|
-
if ((part = parse_string_part(parser)) != NULL) {
|
16849
|
-
pm_node_list_append(&parts, part);
|
16850
|
-
}
|
16851
|
-
}
|
16852
|
-
|
16853
|
-
if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16854
|
-
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16855
|
-
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16856
|
-
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16857
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16858
|
-
} else {
|
16859
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16860
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16861
|
-
}
|
16862
|
-
|
16863
|
-
pm_node_list_free(&parts);
|
16864
|
-
}
|
16865
|
-
|
16866
|
-
if (current == NULL) {
|
16867
|
-
// If the node we just parsed is a symbol node, then we can't
|
16868
|
-
// concatenate it with anything else, so we can now return that
|
16869
|
-
// node.
|
16870
|
-
if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
|
16871
|
-
return node;
|
16872
|
-
}
|
16873
|
-
|
16874
|
-
// If we don't already have a node, then it's fine and we can just
|
16875
|
-
// set the result to be the node we just parsed.
|
16876
|
-
current = node;
|
16877
|
-
} else {
|
16878
|
-
// Otherwise we need to check the type of the node we just parsed.
|
16879
|
-
// If it cannot be concatenated with the previous node, then we'll
|
16880
|
-
// need to add a syntax error.
|
16881
|
-
if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
|
16882
|
-
pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
|
16883
|
-
}
|
17173
|
+
node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
|
17174
|
+
}
|
16884
17175
|
|
16885
|
-
|
16886
|
-
|
16887
|
-
|
16888
|
-
|
16889
|
-
|
17176
|
+
xfree(nodes.nodes);
|
17177
|
+
} else if (leading_rest) {
|
17178
|
+
// Otherwise, if we parsed a single splat pattern, then we know we have an
|
17179
|
+
// array pattern, so we can go ahead and create that node.
|
17180
|
+
node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
|
17181
|
+
}
|
16890
17182
|
|
16891
|
-
|
16892
|
-
|
16893
|
-
current = (pm_node_t *) container;
|
16894
|
-
}
|
17183
|
+
return node;
|
17184
|
+
}
|
16895
17185
|
|
16896
|
-
|
17186
|
+
/**
|
17187
|
+
* Incorporate a negative sign into a numeric node by subtracting 1 character
|
17188
|
+
* from its start bounds. If it's a compound node, then we will recursively
|
17189
|
+
* apply this function to its value.
|
17190
|
+
*/
|
17191
|
+
static inline void
|
17192
|
+
parse_negative_numeric(pm_node_t *node) {
|
17193
|
+
switch (PM_NODE_TYPE(node)) {
|
17194
|
+
case PM_INTEGER_NODE: {
|
17195
|
+
pm_integer_node_t *cast = (pm_integer_node_t *) node;
|
17196
|
+
cast->base.location.start--;
|
17197
|
+
cast->value.negative = true;
|
17198
|
+
break;
|
17199
|
+
}
|
17200
|
+
case PM_FLOAT_NODE: {
|
17201
|
+
pm_float_node_t *cast = (pm_float_node_t *) node;
|
17202
|
+
cast->base.location.start--;
|
17203
|
+
cast->value = -cast->value;
|
17204
|
+
break;
|
17205
|
+
}
|
17206
|
+
case PM_RATIONAL_NODE: {
|
17207
|
+
pm_rational_node_t *cast = (pm_rational_node_t *) node;
|
17208
|
+
cast->base.location.start--;
|
17209
|
+
cast->numerator.negative = true;
|
17210
|
+
break;
|
16897
17211
|
}
|
17212
|
+
case PM_IMAGINARY_NODE:
|
17213
|
+
node->location.start--;
|
17214
|
+
parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
|
17215
|
+
break;
|
17216
|
+
default:
|
17217
|
+
assert(false && "unreachable");
|
17218
|
+
break;
|
16898
17219
|
}
|
16899
|
-
|
16900
|
-
return current;
|
16901
17220
|
}
|
16902
17221
|
|
16903
17222
|
/**
|
@@ -16912,6 +17231,11 @@ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
16912
17231
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
|
16913
17232
|
break;
|
16914
17233
|
}
|
17234
|
+
case PM_ERR_HASH_VALUE:
|
17235
|
+
case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
|
17236
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
|
17237
|
+
break;
|
17238
|
+
}
|
16915
17239
|
case PM_ERR_UNARY_RECEIVER: {
|
16916
17240
|
const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
|
16917
17241
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
|
@@ -17090,6 +17414,63 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
|
|
17090
17414
|
}
|
17091
17415
|
}
|
17092
17416
|
|
17417
|
+
/**
|
17418
|
+
* This struct is used to pass information between the regular expression parser
|
17419
|
+
* and the error callback.
|
17420
|
+
*/
|
17421
|
+
typedef struct {
|
17422
|
+
/** The parser that we are parsing the regular expression for. */
|
17423
|
+
pm_parser_t *parser;
|
17424
|
+
|
17425
|
+
/** The start of the regular expression. */
|
17426
|
+
const uint8_t *start;
|
17427
|
+
|
17428
|
+
/** The end of the regular expression. */
|
17429
|
+
const uint8_t *end;
|
17430
|
+
|
17431
|
+
/**
|
17432
|
+
* Whether or not the source of the regular expression is shared. This
|
17433
|
+
* impacts the location of error messages, because if it is shared then we
|
17434
|
+
* can use the location directly and if it is not, then we use the bounds of
|
17435
|
+
* the regular expression itself.
|
17436
|
+
*/
|
17437
|
+
bool shared;
|
17438
|
+
} parse_regular_expression_error_data_t;
|
17439
|
+
|
17440
|
+
/**
|
17441
|
+
* This callback is called when the regular expression parser encounters a
|
17442
|
+
* syntax error.
|
17443
|
+
*/
|
17444
|
+
static void
|
17445
|
+
parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
|
17446
|
+
parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
|
17447
|
+
pm_location_t location;
|
17448
|
+
|
17449
|
+
if (callback_data->shared) {
|
17450
|
+
location = (pm_location_t) { .start = start, .end = end };
|
17451
|
+
} else {
|
17452
|
+
location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
|
17453
|
+
}
|
17454
|
+
|
17455
|
+
PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
|
17456
|
+
}
|
17457
|
+
|
17458
|
+
/**
|
17459
|
+
* Parse the errors for the regular expression and add them to the parser.
|
17460
|
+
*/
|
17461
|
+
static void
|
17462
|
+
parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
|
17463
|
+
const pm_string_t *unescaped = &node->unescaped;
|
17464
|
+
parse_regular_expression_error_data_t error_data = {
|
17465
|
+
.parser = parser,
|
17466
|
+
.start = node->base.location.start,
|
17467
|
+
.end = node->base.location.end,
|
17468
|
+
.shared = unescaped->type == PM_STRING_SHARED
|
17469
|
+
};
|
17470
|
+
|
17471
|
+
pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), NULL, NULL, parse_regular_expression_error, &error_data);
|
17472
|
+
}
|
17473
|
+
|
17093
17474
|
/**
|
17094
17475
|
* Parse an expression that begins with the previous node that we just lexed.
|
17095
17476
|
*/
|
@@ -17110,8 +17491,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17110
17491
|
break;
|
17111
17492
|
}
|
17112
17493
|
|
17113
|
-
|
17114
|
-
|
17494
|
+
// Ensure that we have a comma between elements in the array.
|
17495
|
+
if ((pm_array_node_size(array) != 0) && !accept1(parser, PM_TOKEN_COMMA)) {
|
17496
|
+
const uint8_t *location = parser->previous.end;
|
17497
|
+
PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
|
17498
|
+
|
17499
|
+
parser->previous.start = location;
|
17500
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
17115
17501
|
}
|
17116
17502
|
|
17117
17503
|
// If we have a right bracket immediately following a comma,
|
@@ -17289,7 +17675,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17289
17675
|
|
17290
17676
|
// If we didn't find a terminator and we didn't find a right
|
17291
17677
|
// parenthesis, then this is a syntax error.
|
17292
|
-
if (!terminator_found) {
|
17678
|
+
if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
|
17293
17679
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
17294
17680
|
}
|
17295
17681
|
|
@@ -17318,7 +17704,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17318
17704
|
if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
|
17319
17705
|
} else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
17320
17706
|
break;
|
17321
|
-
} else {
|
17707
|
+
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
17708
|
+
// If we're at the end of the file, then we're going to add
|
17709
|
+
// an error after this for the ) anyway.
|
17322
17710
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
17323
17711
|
}
|
17324
17712
|
}
|
@@ -17537,8 +17925,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17537
17925
|
) {
|
17538
17926
|
pm_arguments_t arguments = { 0 };
|
17539
17927
|
parse_arguments_list(parser, &arguments, true, accepts_command_call);
|
17540
|
-
|
17541
17928
|
pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
|
17929
|
+
|
17930
|
+
if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
|
17931
|
+
// If we're about to convert an 'it' implicit local
|
17932
|
+
// variable read into a method call, we need to remove
|
17933
|
+
// it from the list of implicit local variables.
|
17934
|
+
parse_target_implicit_parameter(parser, node);
|
17935
|
+
} else {
|
17936
|
+
// Otherwise, we're about to convert a regular local
|
17937
|
+
// variable read into a method call, in which case we
|
17938
|
+
// need to indicate that this was not a read for the
|
17939
|
+
// purposes of warnings.
|
17940
|
+
assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
|
17941
|
+
|
17942
|
+
if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
|
17943
|
+
parse_target_implicit_parameter(parser, node);
|
17944
|
+
} else {
|
17945
|
+
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
|
17946
|
+
pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
|
17947
|
+
}
|
17948
|
+
}
|
17949
|
+
|
17542
17950
|
pm_node_destroy(parser, node);
|
17543
17951
|
return (pm_node_t *) fcall;
|
17544
17952
|
}
|
@@ -17546,31 +17954,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17546
17954
|
|
17547
17955
|
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
|
17548
17956
|
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
|
17549
|
-
} else {
|
17550
|
-
// Check if `it` is not going to be assigned.
|
17551
|
-
switch (parser->current.type) {
|
17552
|
-
case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
|
17553
|
-
case PM_TOKEN_AMPERSAND_EQUAL:
|
17554
|
-
case PM_TOKEN_CARET_EQUAL:
|
17555
|
-
case PM_TOKEN_EQUAL:
|
17556
|
-
case PM_TOKEN_GREATER_GREATER_EQUAL:
|
17557
|
-
case PM_TOKEN_LESS_LESS_EQUAL:
|
17558
|
-
case PM_TOKEN_MINUS_EQUAL:
|
17559
|
-
case PM_TOKEN_PARENTHESIS_RIGHT:
|
17560
|
-
case PM_TOKEN_PERCENT_EQUAL:
|
17561
|
-
case PM_TOKEN_PIPE_EQUAL:
|
17562
|
-
case PM_TOKEN_PIPE_PIPE_EQUAL:
|
17563
|
-
case PM_TOKEN_PLUS_EQUAL:
|
17564
|
-
case PM_TOKEN_SLASH_EQUAL:
|
17565
|
-
case PM_TOKEN_STAR_EQUAL:
|
17566
|
-
case PM_TOKEN_STAR_STAR_EQUAL:
|
17567
|
-
break;
|
17568
|
-
default:
|
17569
|
-
// Once we know it's neither a method call nor an
|
17570
|
-
// assignment, we can finally create `it` default
|
17571
|
-
// parameter.
|
17572
|
-
node = pm_node_check_it(parser, node);
|
17573
|
-
}
|
17574
17957
|
}
|
17575
17958
|
|
17576
17959
|
return node;
|
@@ -17831,6 +18214,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17831
18214
|
// as frozen because when clause strings are frozen.
|
17832
18215
|
if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
|
17833
18216
|
pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
|
18217
|
+
} else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
|
18218
|
+
pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
|
17834
18219
|
}
|
17835
18220
|
|
17836
18221
|
pm_when_clause_static_literals_add(parser, &literals, condition);
|
@@ -17887,7 +18272,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17887
18272
|
pm_token_t in_keyword = parser->previous;
|
17888
18273
|
|
17889
18274
|
pm_constant_id_list_t captures = { 0 };
|
17890
|
-
pm_node_t *pattern = parse_pattern(parser, &captures,
|
18275
|
+
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
|
17891
18276
|
|
17892
18277
|
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
|
17893
18278
|
pm_constant_id_list_free(&captures);
|
@@ -17916,7 +18301,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17916
18301
|
then_keyword = not_provided(parser);
|
17917
18302
|
}
|
17918
18303
|
} else {
|
17919
|
-
expect1(parser, PM_TOKEN_KEYWORD_THEN,
|
18304
|
+
expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
|
17920
18305
|
then_keyword = parser->previous;
|
17921
18306
|
}
|
17922
18307
|
|
@@ -18236,7 +18621,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18236
18621
|
|
18237
18622
|
if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
|
18238
18623
|
receiver = parse_variable_call(parser);
|
18239
|
-
receiver = pm_node_check_it(parser, receiver);
|
18240
18624
|
|
18241
18625
|
pm_parser_scope_push(parser, true);
|
18242
18626
|
lex_state_set(parser, PM_LEX_STATE_FNAME);
|
@@ -18370,7 +18754,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18370
18754
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
18371
18755
|
parser->command_start = true;
|
18372
18756
|
|
18373
|
-
|
18757
|
+
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
18758
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
|
18759
|
+
parser->previous.start = parser->previous.end;
|
18760
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
18761
|
+
}
|
18762
|
+
|
18374
18763
|
rparen = parser->previous;
|
18375
18764
|
break;
|
18376
18765
|
}
|
@@ -18568,7 +18957,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18568
18957
|
if (match1(parser, PM_TOKEN_COMMA)) {
|
18569
18958
|
index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
|
18570
18959
|
} else {
|
18571
|
-
index = parse_target(parser, index, false);
|
18960
|
+
index = parse_target(parser, index, false, false);
|
18572
18961
|
}
|
18573
18962
|
|
18574
18963
|
context_pop(parser);
|
@@ -19203,13 +19592,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19203
19592
|
bool ascii_only = parser->current_regular_expression_ascii_only;
|
19204
19593
|
parser_lex(parser);
|
19205
19594
|
|
19206
|
-
// If we hit an end, then we can create a regular expression
|
19207
|
-
// without interpolation, which can be represented more
|
19208
|
-
// more easily compiled.
|
19595
|
+
// If we hit an end, then we can create a regular expression
|
19596
|
+
// node without interpolation, which can be represented more
|
19597
|
+
// succinctly and more easily compiled.
|
19209
19598
|
if (accept1(parser, PM_TOKEN_REGEXP_END)) {
|
19210
|
-
|
19211
|
-
|
19212
|
-
|
19599
|
+
pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
19600
|
+
|
19601
|
+
// If we're not immediately followed by a =~, then we want
|
19602
|
+
// to parse all of the errors at this point. If it is
|
19603
|
+
// followed by a =~, then it will get parsed higher up while
|
19604
|
+
// parsing the named captures as well.
|
19605
|
+
if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
|
19606
|
+
parse_regular_expression_errors(parser, node);
|
19607
|
+
}
|
19608
|
+
|
19609
|
+
pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
|
19610
|
+
return (pm_node_t *) node;
|
19213
19611
|
}
|
19214
19612
|
|
19215
19613
|
// If we get here, then we have interpolation so we'll need to create
|
@@ -19219,6 +19617,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19219
19617
|
pm_token_t opening = not_provided(parser);
|
19220
19618
|
pm_token_t closing = not_provided(parser);
|
19221
19619
|
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
|
19620
|
+
|
19621
|
+
if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
19622
|
+
// This is extremely strange, but the first string part of a
|
19623
|
+
// regular expression will always be tagged as binary if we
|
19624
|
+
// are in a US-ASCII file, no matter its contents.
|
19625
|
+
pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
|
19626
|
+
}
|
19627
|
+
|
19222
19628
|
pm_interpolated_regular_expression_node_append(interpolated, part);
|
19223
19629
|
} else {
|
19224
19630
|
// If the first part of the body of the regular expression is not a
|
@@ -19419,9 +19825,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19419
19825
|
|
19420
19826
|
switch (parser->current.type) {
|
19421
19827
|
case PM_TOKEN_PARENTHESIS_LEFT: {
|
19422
|
-
assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
|
19423
|
-
parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
|
19424
|
-
|
19425
19828
|
pm_token_t opening = parser->current;
|
19426
19829
|
parser_lex(parser);
|
19427
19830
|
|
@@ -19438,9 +19841,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19438
19841
|
break;
|
19439
19842
|
}
|
19440
19843
|
case PM_CASE_PARAMETER: {
|
19441
|
-
assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
|
19442
|
-
parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
|
19443
|
-
|
19444
19844
|
pm_accepts_block_stack_push(parser, false);
|
19445
19845
|
pm_token_t opening = not_provided(parser);
|
19446
19846
|
block_parameters = parse_block_parameters(parser, false, &opening, true);
|
@@ -19693,122 +20093,126 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
|
|
19693
20093
|
}
|
19694
20094
|
|
19695
20095
|
/**
|
19696
|
-
*
|
19697
|
-
*
|
20096
|
+
* This struct is used to pass information between the regular expression parser
|
20097
|
+
* and the named capture callback.
|
19698
20098
|
*/
|
19699
|
-
|
19700
|
-
|
19701
|
-
|
19702
|
-
return false;
|
19703
|
-
}
|
20099
|
+
typedef struct {
|
20100
|
+
/** The parser that is parsing the regular expression. */
|
20101
|
+
pm_parser_t *parser;
|
19704
20102
|
|
19705
|
-
|
19706
|
-
|
19707
|
-
if (!width) {
|
19708
|
-
return false;
|
19709
|
-
}
|
20103
|
+
/** The call node wrapping the regular expression node. */
|
20104
|
+
pm_call_node_t *call;
|
19710
20105
|
|
19711
|
-
|
19712
|
-
|
19713
|
-
if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
|
19714
|
-
} else {
|
19715
|
-
if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
|
19716
|
-
}
|
20106
|
+
/** The match write node that is being created. */
|
20107
|
+
pm_match_write_node_t *match;
|
19717
20108
|
|
19718
|
-
|
19719
|
-
|
19720
|
-
const uint8_t *cursor = source + width;
|
19721
|
-
while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
|
19722
|
-
cursor += width;
|
19723
|
-
}
|
20109
|
+
/** The list of names that have been parsed. */
|
20110
|
+
pm_constant_id_list_t names;
|
19724
20111
|
|
19725
|
-
|
19726
|
-
|
20112
|
+
/**
|
20113
|
+
* Whether the content of the regular expression is shared. This impacts
|
20114
|
+
* whether or not we used owned constants or shared constants in the
|
20115
|
+
* constant pool for the names of the captures.
|
20116
|
+
*/
|
20117
|
+
bool shared;
|
20118
|
+
} parse_regular_expression_named_capture_data_t;
|
19727
20119
|
|
19728
20120
|
/**
|
19729
|
-
*
|
19730
|
-
*
|
20121
|
+
* This callback is called when the regular expression parser encounters a named
|
20122
|
+
* capture group.
|
19731
20123
|
*/
|
19732
|
-
static
|
19733
|
-
|
19734
|
-
|
19735
|
-
pm_node_t *result;
|
20124
|
+
static void
|
20125
|
+
parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
|
20126
|
+
parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data;
|
19736
20127
|
|
19737
|
-
|
19738
|
-
|
19739
|
-
|
19740
|
-
pm_match_write_node_t *match = NULL;
|
19741
|
-
pm_constant_id_list_t names = { 0 };
|
20128
|
+
pm_parser_t *parser = callback_data->parser;
|
20129
|
+
pm_call_node_t *call = callback_data->call;
|
20130
|
+
pm_constant_id_list_t *names = &callback_data->names;
|
19742
20131
|
|
19743
|
-
|
19744
|
-
|
20132
|
+
const uint8_t *source = pm_string_source(capture);
|
20133
|
+
size_t length = pm_string_length(capture);
|
19745
20134
|
|
19746
|
-
|
19747
|
-
|
20135
|
+
pm_location_t location;
|
20136
|
+
pm_constant_id_t name;
|
19748
20137
|
|
19749
|
-
|
19750
|
-
|
20138
|
+
// If the name of the capture group isn't a valid identifier, we do
|
20139
|
+
// not add it to the local table.
|
20140
|
+
if (!pm_slice_is_valid_local(parser, source, source + length)) return;
|
19751
20141
|
|
19752
|
-
|
19753
|
-
|
19754
|
-
|
20142
|
+
if (callback_data->shared) {
|
20143
|
+
// If the unescaped string is a slice of the source, then we can
|
20144
|
+
// copy the names directly. The pointers will line up.
|
20145
|
+
location = (pm_location_t) { .start = source, .end = source + length };
|
20146
|
+
name = pm_parser_constant_id_location(parser, location.start, location.end);
|
20147
|
+
} else {
|
20148
|
+
// Otherwise, the name is a slice of the malloc-ed owned string,
|
20149
|
+
// in which case we need to copy it out into a new string.
|
20150
|
+
location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
|
19755
20151
|
|
19756
|
-
|
19757
|
-
|
19758
|
-
// copy the names directly. The pointers will line up.
|
19759
|
-
location = (pm_location_t) { .start = source, .end = source + length };
|
19760
|
-
name = pm_parser_constant_id_location(parser, location.start, location.end);
|
19761
|
-
} else {
|
19762
|
-
// Otherwise, the name is a slice of the malloc-ed owned string,
|
19763
|
-
// in which case we need to copy it out into a new string.
|
19764
|
-
location = call->receiver->location;
|
20152
|
+
void *memory = xmalloc(length);
|
20153
|
+
if (memory == NULL) abort();
|
19765
20154
|
|
19766
|
-
|
19767
|
-
|
20155
|
+
memcpy(memory, source, length);
|
20156
|
+
name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
|
20157
|
+
}
|
19768
20158
|
|
19769
|
-
|
19770
|
-
|
19771
|
-
|
20159
|
+
// Add this name to the list of constants if it is valid, not duplicated,
|
20160
|
+
// and not a keyword.
|
20161
|
+
if (name != 0 && !pm_constant_id_list_includes(names, name)) {
|
20162
|
+
pm_constant_id_list_append(names, name);
|
19772
20163
|
|
19773
|
-
|
19774
|
-
|
19775
|
-
|
19776
|
-
|
19777
|
-
|
20164
|
+
int depth;
|
20165
|
+
if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
|
20166
|
+
// If the local is not already a local but it is a keyword, then we
|
20167
|
+
// do not want to add a capture for this.
|
20168
|
+
if (pm_local_is_keyword((const char *) source, length)) return;
|
19778
20169
|
|
19779
|
-
|
19780
|
-
|
19781
|
-
|
19782
|
-
|
19783
|
-
if (pm_local_is_keyword((const char *) source, length)) continue;
|
20170
|
+
// If the identifier is not already a local, then we will add it to
|
20171
|
+
// the local table.
|
20172
|
+
pm_parser_local_add(parser, name, location.start, location.end, 0);
|
20173
|
+
}
|
19784
20174
|
|
19785
|
-
|
19786
|
-
|
20175
|
+
// Here we lazily create the MatchWriteNode since we know we're
|
20176
|
+
// about to add a target.
|
20177
|
+
if (callback_data->match == NULL) {
|
20178
|
+
callback_data->match = pm_match_write_node_create(parser, call);
|
20179
|
+
}
|
19787
20180
|
|
19788
|
-
|
19789
|
-
|
19790
|
-
|
20181
|
+
// Next, create the local variable target and add it to the list of
|
20182
|
+
// targets for the match.
|
20183
|
+
pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
|
20184
|
+
pm_node_list_append(&callback_data->match->targets, target);
|
20185
|
+
}
|
20186
|
+
}
|
19791
20187
|
|
19792
|
-
|
19793
|
-
|
19794
|
-
|
19795
|
-
|
19796
|
-
|
19797
|
-
|
20188
|
+
/**
|
20189
|
+
* Potentially change a =~ with a regular expression with named captures into a
|
20190
|
+
* match write node.
|
20191
|
+
*/
|
20192
|
+
static pm_node_t *
|
20193
|
+
parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
|
20194
|
+
parse_regular_expression_named_capture_data_t callback_data = {
|
20195
|
+
.parser = parser,
|
20196
|
+
.call = call,
|
20197
|
+
.names = { 0 },
|
20198
|
+
.shared = content->type == PM_STRING_SHARED
|
20199
|
+
};
|
19798
20200
|
|
19799
|
-
|
19800
|
-
|
19801
|
-
|
19802
|
-
|
19803
|
-
|
20201
|
+
parse_regular_expression_error_data_t error_data = {
|
20202
|
+
.parser = parser,
|
20203
|
+
.start = call->receiver->location.start,
|
20204
|
+
.end = call->receiver->location.end,
|
20205
|
+
.shared = content->type == PM_STRING_SHARED
|
20206
|
+
};
|
20207
|
+
|
20208
|
+
pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
|
20209
|
+
pm_constant_id_list_free(&callback_data.names);
|
19804
20210
|
|
19805
|
-
|
20211
|
+
if (callback_data.match != NULL) {
|
20212
|
+
return (pm_node_t *) callback_data.match;
|
19806
20213
|
} else {
|
19807
|
-
|
20214
|
+
return (pm_node_t *) call;
|
19808
20215
|
}
|
19809
|
-
|
19810
|
-
pm_string_list_free(&named_captures);
|
19811
|
-
return result;
|
19812
20216
|
}
|
19813
20217
|
|
19814
20218
|
static inline pm_node_t *
|
@@ -19925,7 +20329,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
19925
20329
|
return result;
|
19926
20330
|
}
|
19927
20331
|
case PM_CALL_NODE: {
|
19928
|
-
parser_lex(parser);
|
19929
20332
|
pm_call_node_t *cast = (pm_call_node_t *) node;
|
19930
20333
|
|
19931
20334
|
// If we have a vcall (a method with no arguments and no
|
@@ -19936,6 +20339,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
19936
20339
|
pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
|
19937
20340
|
|
19938
20341
|
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
|
20342
|
+
parser_lex(parser);
|
20343
|
+
|
19939
20344
|
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
|
19940
20345
|
pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
|
19941
20346
|
|
@@ -19943,6 +20348,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
19943
20348
|
return result;
|
19944
20349
|
}
|
19945
20350
|
|
20351
|
+
// Move past the token here so that we have already added
|
20352
|
+
// the local variable by this point.
|
20353
|
+
parser_lex(parser);
|
20354
|
+
|
19946
20355
|
// If there is no call operator and the message is "[]" then
|
19947
20356
|
// this is an aref expression, and we can transform it into
|
19948
20357
|
// an aset expression.
|
@@ -20038,7 +20447,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20038
20447
|
return result;
|
20039
20448
|
}
|
20040
20449
|
case PM_CALL_NODE: {
|
20041
|
-
parser_lex(parser);
|
20042
20450
|
pm_call_node_t *cast = (pm_call_node_t *) node;
|
20043
20451
|
|
20044
20452
|
// If we have a vcall (a method with no arguments and no
|
@@ -20049,6 +20457,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20049
20457
|
pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
|
20050
20458
|
|
20051
20459
|
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
|
20460
|
+
parser_lex(parser);
|
20461
|
+
|
20052
20462
|
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
|
20053
20463
|
pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
|
20054
20464
|
|
@@ -20056,6 +20466,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20056
20466
|
return result;
|
20057
20467
|
}
|
20058
20468
|
|
20469
|
+
// Move past the token here so that we have already added
|
20470
|
+
// the local variable by this point.
|
20471
|
+
parser_lex(parser);
|
20472
|
+
|
20059
20473
|
// If there is no call operator and the message is "[]" then
|
20060
20474
|
// this is an aref expression, and we can transform it into
|
20061
20475
|
// an aset expression.
|
@@ -20209,7 +20623,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20209
20623
|
// In this case we have an operator but we don't know what it's for.
|
20210
20624
|
// We need to treat it as an error. For now, we'll mark it as an error
|
20211
20625
|
// and just skip right past it.
|
20212
|
-
|
20626
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
|
20213
20627
|
return node;
|
20214
20628
|
}
|
20215
20629
|
}
|
@@ -20465,7 +20879,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20465
20879
|
|
20466
20880
|
if (
|
20467
20881
|
(parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
|
20468
|
-
(token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))
|
20882
|
+
(accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
|
20469
20883
|
) {
|
20470
20884
|
// If we have a constant immediately following a '::' operator, then
|
20471
20885
|
// this can either be a constant path or a method call, depending on
|
@@ -20591,7 +21005,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20591
21005
|
parser_lex(parser);
|
20592
21006
|
|
20593
21007
|
pm_constant_id_list_t captures = { 0 };
|
20594
|
-
pm_node_t *pattern = parse_pattern(parser, &captures,
|
21008
|
+
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
|
20595
21009
|
|
20596
21010
|
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
|
20597
21011
|
pm_constant_id_list_free(&captures);
|
@@ -20608,7 +21022,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20608
21022
|
parser_lex(parser);
|
20609
21023
|
|
20610
21024
|
pm_constant_id_list_t captures = { 0 };
|
20611
|
-
pm_node_t *pattern = parse_pattern(parser, &captures,
|
21025
|
+
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
|
20612
21026
|
|
20613
21027
|
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
|
20614
21028
|
pm_constant_id_list_free(&captures);
|
@@ -20621,6 +21035,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20621
21035
|
}
|
20622
21036
|
}
|
20623
21037
|
|
21038
|
+
#undef PM_PARSE_PATTERN_SINGLE
|
21039
|
+
#undef PM_PARSE_PATTERN_TOP
|
21040
|
+
#undef PM_PARSE_PATTERN_MULTI
|
21041
|
+
|
20624
21042
|
/**
|
20625
21043
|
* Parse an expression at the given point of the parser using the given binding
|
20626
21044
|
* power to parse subsequent chains. If this function finds a syntax error, it
|
@@ -21004,7 +21422,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
21004
21422
|
|
21005
21423
|
// Scopes given from the outside are not allowed to have numbered
|
21006
21424
|
// parameters.
|
21007
|
-
parser->current_scope->
|
21425
|
+
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
|
21008
21426
|
|
21009
21427
|
for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
|
21010
21428
|
const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
|
@@ -21392,331 +21810,3 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
|
|
21392
21810
|
}
|
21393
21811
|
|
21394
21812
|
#endif
|
21395
|
-
|
21396
|
-
/** An error that is going to be formatted into the output. */
|
21397
|
-
typedef struct {
|
21398
|
-
/** A pointer to the diagnostic that was generated during parsing. */
|
21399
|
-
pm_diagnostic_t *error;
|
21400
|
-
|
21401
|
-
/** The start line of the diagnostic message. */
|
21402
|
-
int32_t line;
|
21403
|
-
|
21404
|
-
/** The column start of the diagnostic message. */
|
21405
|
-
uint32_t column_start;
|
21406
|
-
|
21407
|
-
/** The column end of the diagnostic message. */
|
21408
|
-
uint32_t column_end;
|
21409
|
-
} pm_error_t;
|
21410
|
-
|
21411
|
-
/** The format that will be used to format the errors into the output. */
|
21412
|
-
typedef struct {
|
21413
|
-
/** The prefix that will be used for line numbers. */
|
21414
|
-
const char *number_prefix;
|
21415
|
-
|
21416
|
-
/** The prefix that will be used for blank lines. */
|
21417
|
-
const char *blank_prefix;
|
21418
|
-
|
21419
|
-
/** The divider that will be used between sections of source code. */
|
21420
|
-
const char *divider;
|
21421
|
-
|
21422
|
-
/** The length of the blank prefix. */
|
21423
|
-
size_t blank_prefix_length;
|
21424
|
-
|
21425
|
-
/** The length of the divider. */
|
21426
|
-
size_t divider_length;
|
21427
|
-
} pm_error_format_t;
|
21428
|
-
|
21429
|
-
#define PM_COLOR_GRAY "\033[38;5;102m"
|
21430
|
-
#define PM_COLOR_RED "\033[1;31m"
|
21431
|
-
#define PM_COLOR_RESET "\033[m"
|
21432
|
-
|
21433
|
-
static inline pm_error_t *
|
21434
|
-
pm_parser_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
|
21435
|
-
pm_error_t *errors = xcalloc(error_list->size, sizeof(pm_error_t));
|
21436
|
-
if (errors == NULL) return NULL;
|
21437
|
-
|
21438
|
-
int32_t start_line = parser->start_line;
|
21439
|
-
for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
|
21440
|
-
pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
|
21441
|
-
pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
|
21442
|
-
|
21443
|
-
// We're going to insert this error into the array in sorted order. We
|
21444
|
-
// do this by finding the first error that has a line number greater
|
21445
|
-
// than the current error and then inserting the current error before
|
21446
|
-
// that one.
|
21447
|
-
size_t index = 0;
|
21448
|
-
while (
|
21449
|
-
(index < error_list->size) &&
|
21450
|
-
(errors[index].error != NULL) &&
|
21451
|
-
(
|
21452
|
-
(errors[index].line < start.line) ||
|
21453
|
-
((errors[index].line == start.line) && (errors[index].column_start < start.column))
|
21454
|
-
)
|
21455
|
-
) index++;
|
21456
|
-
|
21457
|
-
// Now we're going to shift all of the errors after this one down one
|
21458
|
-
// index to make room for the new error.
|
21459
|
-
if (index + 1 < error_list->size) {
|
21460
|
-
memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
|
21461
|
-
}
|
21462
|
-
|
21463
|
-
// Finally, we'll insert the error into the array.
|
21464
|
-
uint32_t column_end;
|
21465
|
-
if (start.line == end.line) {
|
21466
|
-
column_end = end.column;
|
21467
|
-
} else {
|
21468
|
-
column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
|
21469
|
-
}
|
21470
|
-
|
21471
|
-
// Ensure we have at least one column of error.
|
21472
|
-
if (start.column == column_end) column_end++;
|
21473
|
-
|
21474
|
-
errors[index] = (pm_error_t) {
|
21475
|
-
.error = error,
|
21476
|
-
.line = start.line,
|
21477
|
-
.column_start = start.column,
|
21478
|
-
.column_end = column_end
|
21479
|
-
};
|
21480
|
-
}
|
21481
|
-
|
21482
|
-
return errors;
|
21483
|
-
}
|
21484
|
-
|
21485
|
-
static inline void
|
21486
|
-
pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, pm_buffer_t *buffer) {
|
21487
|
-
int32_t line_delta = line - parser->start_line;
|
21488
|
-
assert(line_delta >= 0);
|
21489
|
-
|
21490
|
-
size_t index = (size_t) line_delta;
|
21491
|
-
assert(index < newline_list->size);
|
21492
|
-
|
21493
|
-
const uint8_t *start = &parser->start[newline_list->offsets[index]];
|
21494
|
-
const uint8_t *end;
|
21495
|
-
|
21496
|
-
if (index >= newline_list->size - 1) {
|
21497
|
-
end = parser->end;
|
21498
|
-
} else {
|
21499
|
-
end = &parser->start[newline_list->offsets[index + 1]];
|
21500
|
-
}
|
21501
|
-
|
21502
|
-
pm_buffer_append_format(buffer, number_prefix, line);
|
21503
|
-
pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
|
21504
|
-
|
21505
|
-
if (end == parser->end && end[-1] != '\n') {
|
21506
|
-
pm_buffer_append_string(buffer, "\n", 1);
|
21507
|
-
}
|
21508
|
-
}
|
21509
|
-
|
21510
|
-
/**
|
21511
|
-
* Format the errors on the parser into the given buffer.
|
21512
|
-
*/
|
21513
|
-
PRISM_EXPORTED_FUNCTION void
|
21514
|
-
pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, pm_buffer_t *buffer, bool colorize, bool inline_messages) {
|
21515
|
-
assert(error_list->size != 0);
|
21516
|
-
|
21517
|
-
// First, we're going to sort all of the errors by line number using an
|
21518
|
-
// insertion sort into a newly allocated array.
|
21519
|
-
const int32_t start_line = parser->start_line;
|
21520
|
-
const pm_newline_list_t *newline_list = &parser->newline_list;
|
21521
|
-
|
21522
|
-
pm_error_t *errors = pm_parser_errors_format_sort(parser, error_list, newline_list);
|
21523
|
-
if (errors == NULL) return;
|
21524
|
-
|
21525
|
-
// Now we're going to determine how we're going to format line numbers and
|
21526
|
-
// blank lines based on the maximum number of digits in the line numbers
|
21527
|
-
// that are going to be displaid.
|
21528
|
-
pm_error_format_t error_format;
|
21529
|
-
int32_t first_line_number = errors[0].line;
|
21530
|
-
int32_t last_line_number = errors[error_list->size - 1].line;
|
21531
|
-
|
21532
|
-
// If we have a maximum line number that is negative, then we're going to
|
21533
|
-
// use the absolute value for comparison but multiple by 10 to additionally
|
21534
|
-
// have a column for the negative sign.
|
21535
|
-
if (first_line_number < 0) first_line_number = (-first_line_number) * 10;
|
21536
|
-
if (last_line_number < 0) last_line_number = (-last_line_number) * 10;
|
21537
|
-
int32_t max_line_number = first_line_number > last_line_number ? first_line_number : last_line_number;
|
21538
|
-
|
21539
|
-
if (max_line_number < 10) {
|
21540
|
-
if (colorize) {
|
21541
|
-
error_format = (pm_error_format_t) {
|
21542
|
-
.number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
|
21543
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21544
|
-
.divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
|
21545
|
-
};
|
21546
|
-
} else {
|
21547
|
-
error_format = (pm_error_format_t) {
|
21548
|
-
.number_prefix = "%1" PRIi32 " | ",
|
21549
|
-
.blank_prefix = " | ",
|
21550
|
-
.divider = " ~~~~~\n"
|
21551
|
-
};
|
21552
|
-
}
|
21553
|
-
} else if (max_line_number < 100) {
|
21554
|
-
if (colorize) {
|
21555
|
-
error_format = (pm_error_format_t) {
|
21556
|
-
.number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
|
21557
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21558
|
-
.divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
|
21559
|
-
};
|
21560
|
-
} else {
|
21561
|
-
error_format = (pm_error_format_t) {
|
21562
|
-
.number_prefix = "%2" PRIi32 " | ",
|
21563
|
-
.blank_prefix = " | ",
|
21564
|
-
.divider = " ~~~~~~\n"
|
21565
|
-
};
|
21566
|
-
}
|
21567
|
-
} else if (max_line_number < 1000) {
|
21568
|
-
if (colorize) {
|
21569
|
-
error_format = (pm_error_format_t) {
|
21570
|
-
.number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
|
21571
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21572
|
-
.divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
|
21573
|
-
};
|
21574
|
-
} else {
|
21575
|
-
error_format = (pm_error_format_t) {
|
21576
|
-
.number_prefix = "%3" PRIi32 " | ",
|
21577
|
-
.blank_prefix = " | ",
|
21578
|
-
.divider = " ~~~~~~~\n"
|
21579
|
-
};
|
21580
|
-
}
|
21581
|
-
} else if (max_line_number < 10000) {
|
21582
|
-
if (colorize) {
|
21583
|
-
error_format = (pm_error_format_t) {
|
21584
|
-
.number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
|
21585
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21586
|
-
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
21587
|
-
};
|
21588
|
-
} else {
|
21589
|
-
error_format = (pm_error_format_t) {
|
21590
|
-
.number_prefix = "%4" PRIi32 " | ",
|
21591
|
-
.blank_prefix = " | ",
|
21592
|
-
.divider = " ~~~~~~~~\n"
|
21593
|
-
};
|
21594
|
-
}
|
21595
|
-
} else {
|
21596
|
-
if (colorize) {
|
21597
|
-
error_format = (pm_error_format_t) {
|
21598
|
-
.number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
|
21599
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21600
|
-
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
21601
|
-
};
|
21602
|
-
} else {
|
21603
|
-
error_format = (pm_error_format_t) {
|
21604
|
-
.number_prefix = "%5" PRIi32 " | ",
|
21605
|
-
.blank_prefix = " | ",
|
21606
|
-
.divider = " ~~~~~~~~\n"
|
21607
|
-
};
|
21608
|
-
}
|
21609
|
-
}
|
21610
|
-
|
21611
|
-
error_format.blank_prefix_length = strlen(error_format.blank_prefix);
|
21612
|
-
error_format.divider_length = strlen(error_format.divider);
|
21613
|
-
|
21614
|
-
// Now we're going to iterate through every error in our error list and
|
21615
|
-
// display it. While we're iterating, we will display some padding lines of
|
21616
|
-
// the source before the error to give some context. We'll be careful not to
|
21617
|
-
// display the same line twice in case the errors are close enough in the
|
21618
|
-
// source.
|
21619
|
-
int32_t last_line = parser->start_line - 1;
|
21620
|
-
const pm_encoding_t *encoding = parser->encoding;
|
21621
|
-
|
21622
|
-
for (size_t index = 0; index < error_list->size; index++) {
|
21623
|
-
pm_error_t *error = &errors[index];
|
21624
|
-
|
21625
|
-
// Here we determine how many lines of padding of the source to display,
|
21626
|
-
// based on the difference from the last line that was displaid.
|
21627
|
-
if (error->line - last_line > 1) {
|
21628
|
-
if (error->line - last_line > 2) {
|
21629
|
-
if ((index != 0) && (error->line - last_line > 3)) {
|
21630
|
-
pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
|
21631
|
-
}
|
21632
|
-
|
21633
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21634
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
|
21635
|
-
}
|
21636
|
-
|
21637
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21638
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
|
21639
|
-
}
|
21640
|
-
|
21641
|
-
// If this is the first error or we're on a new line, then we'll display
|
21642
|
-
// the line that has the error in it.
|
21643
|
-
if ((index == 0) || (error->line != last_line)) {
|
21644
|
-
if (colorize) {
|
21645
|
-
pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 12);
|
21646
|
-
} else {
|
21647
|
-
pm_buffer_append_string(buffer, "> ", 2);
|
21648
|
-
}
|
21649
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
|
21650
|
-
}
|
21651
|
-
|
21652
|
-
const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
|
21653
|
-
if (start == parser->end) pm_buffer_append_byte(buffer, '\n');
|
21654
|
-
|
21655
|
-
// Now we'll display the actual error message. We'll do this by first
|
21656
|
-
// putting the prefix to the line, then a bunch of blank spaces
|
21657
|
-
// depending on the column, then as many carets as we need to display
|
21658
|
-
// the width of the error, then the error message itself.
|
21659
|
-
//
|
21660
|
-
// Note that this doesn't take into account the width of the actual
|
21661
|
-
// character when displaid in the terminal. For some east-asian
|
21662
|
-
// languages or emoji, this means it can be thrown off pretty badly. We
|
21663
|
-
// will need to solve this eventually.
|
21664
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21665
|
-
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
|
21666
|
-
|
21667
|
-
size_t column = 0;
|
21668
|
-
while (column < error->column_start) {
|
21669
|
-
pm_buffer_append_byte(buffer, ' ');
|
21670
|
-
|
21671
|
-
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21672
|
-
column += (char_width == 0 ? 1 : char_width);
|
21673
|
-
}
|
21674
|
-
|
21675
|
-
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
|
21676
|
-
pm_buffer_append_byte(buffer, '^');
|
21677
|
-
|
21678
|
-
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21679
|
-
column += (char_width == 0 ? 1 : char_width);
|
21680
|
-
|
21681
|
-
while (column < error->column_end) {
|
21682
|
-
pm_buffer_append_byte(buffer, '~');
|
21683
|
-
|
21684
|
-
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21685
|
-
column += (char_width == 0 ? 1 : char_width);
|
21686
|
-
}
|
21687
|
-
|
21688
|
-
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
|
21689
|
-
|
21690
|
-
if (inline_messages) {
|
21691
|
-
pm_buffer_append_byte(buffer, ' ');
|
21692
|
-
assert(error->error != NULL);
|
21693
|
-
|
21694
|
-
const char *message = error->error->message;
|
21695
|
-
pm_buffer_append_string(buffer, message, strlen(message));
|
21696
|
-
}
|
21697
|
-
|
21698
|
-
pm_buffer_append_byte(buffer, '\n');
|
21699
|
-
|
21700
|
-
// Here we determine how many lines of padding to display after the
|
21701
|
-
// error, depending on where the next error is in source.
|
21702
|
-
last_line = error->line;
|
21703
|
-
int32_t next_line = (index == error_list->size - 1) ? (((int32_t) newline_list->size) + parser->start_line) : errors[index + 1].line;
|
21704
|
-
|
21705
|
-
if (next_line - last_line > 1) {
|
21706
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21707
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
21708
|
-
}
|
21709
|
-
|
21710
|
-
if (next_line - last_line > 1) {
|
21711
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21712
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
21713
|
-
}
|
21714
|
-
}
|
21715
|
-
|
21716
|
-
// Finally, we'll free the array of errors that we allocated.
|
21717
|
-
xfree(errors);
|
21718
|
-
}
|
21719
|
-
|
21720
|
-
#undef PM_COLOR_GRAY
|
21721
|
-
#undef PM_COLOR_RED
|
21722
|
-
#undef PM_COLOR_RESET
|