prism 0.28.0 → 0.30.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -1
- data/CONTRIBUTING.md +0 -4
- data/README.md +1 -0
- data/config.yml +95 -26
- data/docs/fuzzing.md +1 -1
- data/docs/ripper_translation.md +22 -0
- data/ext/prism/api_node.c +70 -52
- data/ext/prism/extconf.rb +27 -23
- data/ext/prism/extension.c +107 -372
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +170 -102
- data/include/prism/diagnostic.h +18 -3
- data/include/prism/node.h +0 -21
- data/include/prism/parser.h +23 -25
- data/include/prism/regexp.h +17 -8
- data/include/prism/static_literals.h +3 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +0 -8
- data/include/prism/util/pm_integer.h +16 -9
- data/include/prism/util/pm_string.h +0 -8
- data/include/prism/version.h +2 -2
- data/include/prism.h +0 -11
- data/lib/prism/compiler.rb +3 -0
- data/lib/prism/desugar_compiler.rb +4 -4
- data/lib/prism/dispatcher.rb +14 -0
- data/lib/prism/dot_visitor.rb +54 -35
- data/lib/prism/dsl.rb +23 -18
- data/lib/prism/ffi.rb +25 -4
- data/lib/prism/inspect_visitor.rb +26 -24
- data/lib/prism/mutation_compiler.rb +6 -1
- data/lib/prism/node.rb +314 -389
- data/lib/prism/node_ext.rb +175 -17
- data/lib/prism/parse_result/comments.rb +1 -8
- data/lib/prism/parse_result/newlines.rb +102 -12
- data/lib/prism/parse_result.rb +17 -0
- data/lib/prism/reflection.rb +11 -9
- data/lib/prism/serialize.rb +91 -68
- data/lib/prism/translation/parser/compiler.rb +288 -138
- data/lib/prism/translation/parser.rb +7 -2
- data/lib/prism/translation/ripper.rb +24 -22
- data/lib/prism/translation/ruby_parser.rb +32 -14
- data/lib/prism/visitor.rb +3 -0
- data/lib/prism.rb +0 -4
- data/prism.gemspec +2 -4
- data/rbi/prism/node.rbi +114 -57
- data/rbi/prism/node_ext.rbi +5 -0
- data/rbi/prism/parse_result.rbi +1 -1
- data/rbi/prism/visitor.rbi +3 -0
- data/rbi/prism.rbi +6 -0
- data/sig/prism/dsl.rbs +13 -10
- data/sig/prism/lex_compat.rbs +10 -0
- data/sig/prism/mutation_compiler.rbs +1 -0
- data/sig/prism/node.rbs +72 -48
- data/sig/prism/node_ext.rbs +4 -0
- data/sig/prism/visitor.rbs +1 -0
- data/sig/prism.rbs +21 -0
- data/src/diagnostic.c +56 -27
- data/src/node.c +432 -1690
- data/src/prettyprint.c +97 -54
- data/src/prism.c +1286 -1196
- data/src/regexp.c +133 -68
- data/src/serialize.c +22 -17
- data/src/static_literals.c +63 -84
- data/src/token_type.c +4 -4
- data/src/util/pm_constant_pool.c +0 -8
- data/src/util/pm_integer.c +39 -11
- data/src/util/pm_string.c +0 -12
- data/src/util/pm_strpbrk.c +32 -6
- metadata +3 -5
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -249
- data/src/util/pm_string_list.c +0 -28
data/src/prism.c
CHANGED
@@ -423,7 +423,7 @@ lex_mode_pop(pm_parser_t *parser) {
|
|
423
423
|
* This is the equivalent of IS_lex_state is CRuby.
|
424
424
|
*/
|
425
425
|
static inline bool
|
426
|
-
lex_state_p(pm_parser_t *parser, pm_lex_state_t state) {
|
426
|
+
lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
|
427
427
|
return parser->lex_state & state;
|
428
428
|
}
|
429
429
|
|
@@ -708,7 +708,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
708
708
|
.previous = parser->current_scope,
|
709
709
|
.locals = { 0 },
|
710
710
|
.parameters = PM_SCOPE_PARAMETERS_NONE,
|
711
|
-
.
|
711
|
+
.implicit_parameters = { 0 },
|
712
712
|
.shareable_constant = (closed || parser->current_scope == NULL) ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
|
713
713
|
.closed = closed
|
714
714
|
};
|
@@ -749,42 +749,97 @@ pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
|
|
749
749
|
return scope;
|
750
750
|
}
|
751
751
|
|
752
|
-
|
753
|
-
|
752
|
+
typedef enum {
|
753
|
+
PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
|
754
|
+
PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
|
755
|
+
PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
|
756
|
+
} pm_scope_forwarding_param_check_result_t;
|
757
|
+
|
758
|
+
static pm_scope_forwarding_param_check_result_t
|
759
|
+
pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
|
754
760
|
pm_scope_t *scope = parser->current_scope;
|
755
|
-
|
761
|
+
bool conflict = false;
|
762
|
+
|
763
|
+
while (scope != NULL) {
|
756
764
|
if (scope->parameters & mask) {
|
757
|
-
if (
|
758
|
-
|
759
|
-
|
765
|
+
if (scope->closed) {
|
766
|
+
if (conflict) {
|
767
|
+
return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
|
768
|
+
} else {
|
769
|
+
return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
|
770
|
+
}
|
760
771
|
}
|
761
|
-
|
772
|
+
|
773
|
+
conflict = true;
|
762
774
|
}
|
775
|
+
|
763
776
|
if (scope->closed) break;
|
764
777
|
scope = scope->previous;
|
765
778
|
}
|
766
779
|
|
767
|
-
|
780
|
+
return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
|
768
781
|
}
|
769
782
|
|
770
|
-
static
|
783
|
+
static void
|
771
784
|
pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
|
772
|
-
pm_parser_scope_forwarding_param_check(parser,
|
785
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
|
786
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
787
|
+
// Pass.
|
788
|
+
break;
|
789
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
790
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
|
791
|
+
break;
|
792
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
793
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
|
794
|
+
break;
|
795
|
+
}
|
773
796
|
}
|
774
797
|
|
775
|
-
static
|
798
|
+
static void
|
776
799
|
pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
|
777
|
-
pm_parser_scope_forwarding_param_check(parser,
|
800
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
|
801
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
802
|
+
// Pass.
|
803
|
+
break;
|
804
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
805
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
|
806
|
+
break;
|
807
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
808
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
809
|
+
break;
|
810
|
+
}
|
778
811
|
}
|
779
812
|
|
780
|
-
static
|
781
|
-
pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *
|
782
|
-
pm_parser_scope_forwarding_param_check(parser,
|
813
|
+
static void
|
814
|
+
pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
|
815
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
|
816
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
817
|
+
// Pass.
|
818
|
+
break;
|
819
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
820
|
+
// This shouldn't happen, because ... is not allowed in the
|
821
|
+
// declaration of blocks. If we get here, we assume we already have
|
822
|
+
// an error for this.
|
823
|
+
break;
|
824
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
825
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
826
|
+
break;
|
827
|
+
}
|
783
828
|
}
|
784
829
|
|
785
|
-
static
|
830
|
+
static void
|
786
831
|
pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
|
787
|
-
pm_parser_scope_forwarding_param_check(parser,
|
832
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
|
833
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
834
|
+
// Pass.
|
835
|
+
break;
|
836
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
837
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
|
838
|
+
break;
|
839
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
840
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
|
841
|
+
break;
|
842
|
+
}
|
788
843
|
}
|
789
844
|
|
790
845
|
/**
|
@@ -1128,6 +1183,31 @@ pm_check_value_expression(pm_node_t *node) {
|
|
1128
1183
|
return NULL;
|
1129
1184
|
case PM_BEGIN_NODE: {
|
1130
1185
|
pm_begin_node_t *cast = (pm_begin_node_t *) node;
|
1186
|
+
|
1187
|
+
if (cast->statements == NULL && cast->ensure_clause != NULL) {
|
1188
|
+
node = (pm_node_t *) cast->ensure_clause;
|
1189
|
+
}
|
1190
|
+
else {
|
1191
|
+
if (cast->rescue_clause != NULL) {
|
1192
|
+
if (cast->rescue_clause->statements == NULL) {
|
1193
|
+
return NULL;
|
1194
|
+
}
|
1195
|
+
else if (cast->else_clause != NULL) {
|
1196
|
+
node = (pm_node_t *) cast->else_clause;
|
1197
|
+
}
|
1198
|
+
else {
|
1199
|
+
node = (pm_node_t *) cast->statements;
|
1200
|
+
}
|
1201
|
+
}
|
1202
|
+
else {
|
1203
|
+
node = (pm_node_t *) cast->statements;
|
1204
|
+
}
|
1205
|
+
}
|
1206
|
+
|
1207
|
+
break;
|
1208
|
+
}
|
1209
|
+
case PM_ENSURE_NODE: {
|
1210
|
+
pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
|
1131
1211
|
node = (pm_node_t *) cast->statements;
|
1132
1212
|
break;
|
1133
1213
|
}
|
@@ -1575,7 +1655,7 @@ not_provided(pm_parser_t *parser) {
|
|
1575
1655
|
return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
|
1576
1656
|
}
|
1577
1657
|
|
1578
|
-
#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = parser->start, .end = parser->start })
|
1658
|
+
#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
|
1579
1659
|
#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
|
1580
1660
|
#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
|
1581
1661
|
#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
|
@@ -1703,7 +1783,7 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
|
1703
1783
|
* it's important that it be as fast as possible.
|
1704
1784
|
*/
|
1705
1785
|
static inline size_t
|
1706
|
-
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
1786
|
+
char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
|
1707
1787
|
if (parser->encoding_changed) {
|
1708
1788
|
size_t width;
|
1709
1789
|
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
@@ -2772,8 +2852,7 @@ static pm_call_node_t *
|
|
2772
2852
|
pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
|
2773
2853
|
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
|
2774
2854
|
|
2775
|
-
node->base.location
|
2776
|
-
node->base.location.end = parser->start;
|
2855
|
+
node->base.location = PM_LOCATION_NULL_VALUE(parser);
|
2777
2856
|
node->arguments = arguments;
|
2778
2857
|
|
2779
2858
|
node->name = name;
|
@@ -3025,8 +3104,8 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
|
|
3025
3104
|
.message_loc = target->message_loc,
|
3026
3105
|
.read_name = 0,
|
3027
3106
|
.write_name = target->name,
|
3028
|
-
.
|
3029
|
-
.
|
3107
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
3108
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3030
3109
|
.value = value
|
3031
3110
|
};
|
3032
3111
|
|
@@ -3064,8 +3143,8 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
|
|
3064
3143
|
.arguments = target->arguments,
|
3065
3144
|
.closing_loc = target->closing_loc,
|
3066
3145
|
.block = target->block,
|
3067
|
-
.
|
3068
|
-
.
|
3146
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
3147
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3069
3148
|
.value = value
|
3070
3149
|
};
|
3071
3150
|
|
@@ -3409,9 +3488,9 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
|
|
3409
3488
|
},
|
3410
3489
|
.name = target->name,
|
3411
3490
|
.name_loc = target->base.location,
|
3412
|
-
.
|
3491
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3413
3492
|
.value = value,
|
3414
|
-
.
|
3493
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
3415
3494
|
};
|
3416
3495
|
|
3417
3496
|
return node;
|
@@ -3525,9 +3604,9 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
|
|
3525
3604
|
}
|
3526
3605
|
},
|
3527
3606
|
.target = target,
|
3528
|
-
.
|
3607
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3529
3608
|
.value = value,
|
3530
|
-
.
|
3609
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
3531
3610
|
};
|
3532
3611
|
|
3533
3612
|
return node;
|
@@ -3652,9 +3731,9 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
|
|
3652
3731
|
},
|
3653
3732
|
.name = target->name,
|
3654
3733
|
.name_loc = target->base.location,
|
3655
|
-
.
|
3734
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3656
3735
|
.value = value,
|
3657
|
-
.
|
3736
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
3658
3737
|
};
|
3659
3738
|
|
3660
3739
|
return node;
|
@@ -4236,7 +4315,7 @@ pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
4236
4315
|
}
|
4237
4316
|
|
4238
4317
|
/**
|
4239
|
-
* Allocate and initialize a new
|
4318
|
+
* Allocate and initialize a new RationalNode node from a FLOAT_RATIONAL token.
|
4240
4319
|
*/
|
4241
4320
|
static pm_rational_node_t *
|
4242
4321
|
pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
|
@@ -4246,16 +4325,44 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
4246
4325
|
*node = (pm_rational_node_t) {
|
4247
4326
|
{
|
4248
4327
|
.type = PM_RATIONAL_NODE,
|
4249
|
-
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
4328
|
+
.flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
|
4250
4329
|
.location = PM_LOCATION_TOKEN_VALUE(token)
|
4251
4330
|
},
|
4252
|
-
.
|
4253
|
-
|
4254
|
-
.start = token->start,
|
4255
|
-
.end = token->end - 1
|
4256
|
-
}))
|
4331
|
+
.numerator = { 0 },
|
4332
|
+
.denominator = { 0 }
|
4257
4333
|
};
|
4258
4334
|
|
4335
|
+
const uint8_t *start = token->start;
|
4336
|
+
const uint8_t *end = token->end - 1; // r
|
4337
|
+
|
4338
|
+
while (start < end && *start == '0') start++; // 0.1 -> .1
|
4339
|
+
while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
|
4340
|
+
|
4341
|
+
size_t length = (size_t) (end - start);
|
4342
|
+
if (length == 1) {
|
4343
|
+
node->denominator.value = 1;
|
4344
|
+
return node;
|
4345
|
+
}
|
4346
|
+
|
4347
|
+
const uint8_t *point = memchr(start, '.', length);
|
4348
|
+
assert(point && "should have a decimal point");
|
4349
|
+
|
4350
|
+
uint8_t *digits = malloc(length);
|
4351
|
+
if (digits == NULL) {
|
4352
|
+
fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
|
4353
|
+
abort();
|
4354
|
+
}
|
4355
|
+
|
4356
|
+
memcpy(digits, start, (unsigned long) (point - start));
|
4357
|
+
memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
|
4358
|
+
pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
|
4359
|
+
|
4360
|
+
digits[0] = '1';
|
4361
|
+
if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
|
4362
|
+
pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
|
4363
|
+
free(digits);
|
4364
|
+
|
4365
|
+
pm_integers_reduce(&node->numerator, &node->denominator);
|
4259
4366
|
return node;
|
4260
4367
|
}
|
4261
4368
|
|
@@ -4505,9 +4612,9 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
|
|
4505
4612
|
},
|
4506
4613
|
.name = pm_global_variable_write_name(parser, target),
|
4507
4614
|
.name_loc = target->location,
|
4508
|
-
.
|
4615
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
4509
4616
|
.value = value,
|
4510
|
-
.
|
4617
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
4511
4618
|
};
|
4512
4619
|
|
4513
4620
|
return node;
|
@@ -4566,7 +4673,7 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant
|
|
4566
4673
|
*node = (pm_global_variable_read_node_t) {
|
4567
4674
|
{
|
4568
4675
|
.type = PM_GLOBAL_VARIABLE_READ_NODE,
|
4569
|
-
.location =
|
4676
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
4570
4677
|
},
|
4571
4678
|
.name = name
|
4572
4679
|
};
|
@@ -4608,11 +4715,11 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan
|
|
4608
4715
|
*node = (pm_global_variable_write_node_t) {
|
4609
4716
|
{
|
4610
4717
|
.type = PM_GLOBAL_VARIABLE_WRITE_NODE,
|
4611
|
-
.location =
|
4718
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
4612
4719
|
},
|
4613
4720
|
.name = name,
|
4614
|
-
.name_loc =
|
4615
|
-
.operator_loc =
|
4721
|
+
.name_loc = PM_LOCATION_NULL_VALUE(parser),
|
4722
|
+
.operator_loc = PM_LOCATION_NULL_VALUE(parser),
|
4616
4723
|
.value = value
|
4617
4724
|
};
|
4618
4725
|
|
@@ -4889,7 +4996,7 @@ pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, cons
|
|
4889
4996
|
}
|
4890
4997
|
|
4891
4998
|
/**
|
4892
|
-
* Allocate and initialize a new
|
4999
|
+
* Allocate and initialize a new RationalNode node from an INTEGER_RATIONAL
|
4893
5000
|
* token.
|
4894
5001
|
*/
|
4895
5002
|
static pm_rational_node_t *
|
@@ -4900,16 +5007,24 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const
|
|
4900
5007
|
*node = (pm_rational_node_t) {
|
4901
5008
|
{
|
4902
5009
|
.type = PM_RATIONAL_NODE,
|
4903
|
-
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
5010
|
+
.flags = base | PM_NODE_FLAG_STATIC_LITERAL,
|
4904
5011
|
.location = PM_LOCATION_TOKEN_VALUE(token)
|
4905
5012
|
},
|
4906
|
-
.
|
4907
|
-
|
4908
|
-
.start = token->start,
|
4909
|
-
.end = token->end - 1
|
4910
|
-
}))
|
5013
|
+
.numerator = { 0 },
|
5014
|
+
.denominator = { .value = 1, 0 }
|
4911
5015
|
};
|
4912
5016
|
|
5017
|
+
pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
|
5018
|
+
switch (base) {
|
5019
|
+
case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
|
5020
|
+
case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
|
5021
|
+
case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
|
5022
|
+
case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
|
5023
|
+
default: assert(false && "unreachable"); break;
|
5024
|
+
}
|
5025
|
+
|
5026
|
+
pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
|
5027
|
+
|
4913
5028
|
return node;
|
4914
5029
|
}
|
4915
5030
|
|
@@ -5013,9 +5128,9 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
|
|
5013
5128
|
},
|
5014
5129
|
.name = target->name,
|
5015
5130
|
.name_loc = target->base.location,
|
5016
|
-
.
|
5131
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
5017
5132
|
.value = value,
|
5018
|
-
.
|
5133
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
5019
5134
|
};
|
5020
5135
|
|
5021
5136
|
return node;
|
@@ -5407,6 +5522,23 @@ pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node,
|
|
5407
5522
|
node->base.location.end = closing->end;
|
5408
5523
|
}
|
5409
5524
|
|
5525
|
+
/**
|
5526
|
+
* Create a local variable read that is reading the implicit 'it' variable.
|
5527
|
+
*/
|
5528
|
+
static pm_it_local_variable_read_node_t *
|
5529
|
+
pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
|
5530
|
+
pm_it_local_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_it_local_variable_read_node_t);
|
5531
|
+
|
5532
|
+
*node = (pm_it_local_variable_read_node_t) {
|
5533
|
+
{
|
5534
|
+
.type = PM_IT_LOCAL_VARIABLE_READ_NODE,
|
5535
|
+
.location = PM_LOCATION_TOKEN_VALUE(name)
|
5536
|
+
}
|
5537
|
+
};
|
5538
|
+
|
5539
|
+
return node;
|
5540
|
+
}
|
5541
|
+
|
5410
5542
|
/**
|
5411
5543
|
* Allocate and initialize a new ItParametersNode node.
|
5412
5544
|
*/
|
@@ -5609,10 +5741,10 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
|
|
5609
5741
|
}
|
5610
5742
|
},
|
5611
5743
|
.name_loc = target->location,
|
5612
|
-
.
|
5744
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
5613
5745
|
.value = value,
|
5614
5746
|
.name = name,
|
5615
|
-
.
|
5747
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
5616
5748
|
.depth = depth
|
5617
5749
|
};
|
5618
5750
|
|
@@ -5719,28 +5851,6 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
|
|
5719
5851
|
return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
|
5720
5852
|
}
|
5721
5853
|
|
5722
|
-
/**
|
5723
|
-
* Returns true if the given node is `it` default parameter.
|
5724
|
-
*/
|
5725
|
-
static inline bool
|
5726
|
-
pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
|
5727
|
-
// Check if it's a local variable reference
|
5728
|
-
if (node->type != PM_CALL_NODE) {
|
5729
|
-
return false;
|
5730
|
-
}
|
5731
|
-
|
5732
|
-
// Check if it's a variable call
|
5733
|
-
pm_call_node_t *call_node = (pm_call_node_t *) node;
|
5734
|
-
if (!PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
|
5735
|
-
return false;
|
5736
|
-
}
|
5737
|
-
|
5738
|
-
// Check if it's called `it`
|
5739
|
-
pm_constant_id_t id = ((pm_call_node_t *)node)->name;
|
5740
|
-
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
|
5741
|
-
return pm_token_is_it(constant->start, constant->start + constant->length);
|
5742
|
-
}
|
5743
|
-
|
5744
5854
|
/**
|
5745
5855
|
* Returns true if the given bounds comprise a numbered parameter (i.e., they
|
5746
5856
|
* are of the form /^_\d$/).
|
@@ -6891,7 +7001,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
|
|
6891
7001
|
case PM_REDO_NODE:
|
6892
7002
|
case PM_RETRY_NODE:
|
6893
7003
|
case PM_RETURN_NODE:
|
6894
|
-
pm_parser_warn_node(parser,
|
7004
|
+
pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
|
6895
7005
|
break;
|
6896
7006
|
default:
|
6897
7007
|
break;
|
@@ -7300,9 +7410,9 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
|
|
7300
7410
|
{
|
7301
7411
|
.type = PM_SYMBOL_NODE,
|
7302
7412
|
.flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
|
7303
|
-
.location =
|
7413
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
7304
7414
|
},
|
7305
|
-
.value_loc =
|
7415
|
+
.value_loc = PM_LOCATION_NULL_VALUE(parser),
|
7306
7416
|
.unescaped = { 0 }
|
7307
7417
|
};
|
7308
7418
|
|
@@ -7703,10 +7813,10 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
|
|
7703
7813
|
*node = (pm_while_node_t) {
|
7704
7814
|
{
|
7705
7815
|
.type = PM_WHILE_NODE,
|
7706
|
-
.location =
|
7816
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
7707
7817
|
},
|
7708
|
-
.keyword_loc =
|
7709
|
-
.closing_loc =
|
7818
|
+
.keyword_loc = PM_LOCATION_NULL_VALUE(parser),
|
7819
|
+
.closing_loc = PM_LOCATION_NULL_VALUE(parser),
|
7710
7820
|
.predicate = predicate,
|
7711
7821
|
.statements = statements
|
7712
7822
|
};
|
@@ -7861,51 +7971,6 @@ pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t leng
|
|
7861
7971
|
return constant_id;
|
7862
7972
|
}
|
7863
7973
|
|
7864
|
-
/**
|
7865
|
-
* Create a local variable read that is reading the implicit 'it' variable.
|
7866
|
-
*/
|
7867
|
-
static pm_local_variable_read_node_t *
|
7868
|
-
pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *name) {
|
7869
|
-
if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_ORDINARY) {
|
7870
|
-
pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
|
7871
|
-
return NULL;
|
7872
|
-
}
|
7873
|
-
|
7874
|
-
if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED) {
|
7875
|
-
pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
|
7876
|
-
return NULL;
|
7877
|
-
}
|
7878
|
-
|
7879
|
-
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IT;
|
7880
|
-
|
7881
|
-
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
7882
|
-
pm_parser_local_add(parser, name_id, name->start, name->end, 0);
|
7883
|
-
|
7884
|
-
return pm_local_variable_read_node_create_constant_id(parser, name, name_id, 0, false);
|
7885
|
-
}
|
7886
|
-
|
7887
|
-
/**
|
7888
|
-
* Convert a `it` variable call node to a node for `it` default parameter.
|
7889
|
-
*/
|
7890
|
-
static pm_node_t *
|
7891
|
-
pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
|
7892
|
-
if (
|
7893
|
-
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
|
7894
|
-
!parser->current_scope->closed &&
|
7895
|
-
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
7896
|
-
pm_node_is_it(parser, node)
|
7897
|
-
) {
|
7898
|
-
pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
|
7899
|
-
|
7900
|
-
if (read != NULL) {
|
7901
|
-
pm_node_destroy(parser, node);
|
7902
|
-
node = (pm_node_t *) read;
|
7903
|
-
}
|
7904
|
-
}
|
7905
|
-
|
7906
|
-
return node;
|
7907
|
-
}
|
7908
|
-
|
7909
7974
|
/**
|
7910
7975
|
* Add a parameter name to the current scope and check whether the name of the
|
7911
7976
|
* parameter is unique or not.
|
@@ -7941,6 +8006,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
|
|
7941
8006
|
pm_scope_t *scope = parser->current_scope;
|
7942
8007
|
parser->current_scope = scope->previous;
|
7943
8008
|
pm_locals_free(&scope->locals);
|
8009
|
+
pm_node_list_free(&scope->implicit_parameters);
|
7944
8010
|
xfree(scope);
|
7945
8011
|
}
|
7946
8012
|
|
@@ -8012,7 +8078,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) {
|
|
8012
8078
|
* is beyond the end of the source then return '\0'.
|
8013
8079
|
*/
|
8014
8080
|
static inline uint8_t
|
8015
|
-
peek_at(pm_parser_t *parser, const uint8_t *cursor) {
|
8081
|
+
peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
|
8016
8082
|
if (cursor < parser->end) {
|
8017
8083
|
return *cursor;
|
8018
8084
|
} else {
|
@@ -8035,7 +8101,7 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
|
|
8035
8101
|
* that position is beyond the end of the source then return '\0'.
|
8036
8102
|
*/
|
8037
8103
|
static inline uint8_t
|
8038
|
-
peek(pm_parser_t *parser) {
|
8104
|
+
peek(const pm_parser_t *parser) {
|
8039
8105
|
return peek_at(parser, parser->current.end);
|
8040
8106
|
}
|
8041
8107
|
|
@@ -8100,6 +8166,14 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
|
|
8100
8166
|
return memchr(cursor, '\n', (size_t) length);
|
8101
8167
|
}
|
8102
8168
|
|
8169
|
+
/**
|
8170
|
+
* This is equivalent to the predicate of warn_balanced in CRuby.
|
8171
|
+
*/
|
8172
|
+
static inline bool
|
8173
|
+
ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
|
8174
|
+
return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
|
8175
|
+
}
|
8176
|
+
|
8103
8177
|
/**
|
8104
8178
|
* Here we're going to check if this is a "magic" comment, and perform whatever
|
8105
8179
|
* actions are necessary for it here.
|
@@ -8339,7 +8413,12 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
|
|
8339
8413
|
// If we have hit a ractor pragma, attempt to lex that.
|
8340
8414
|
uint32_t value_length = (uint32_t) (value_end - value_start);
|
8341
8415
|
if (key_length == 24 && pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
|
8342
|
-
|
8416
|
+
const uint8_t *cursor = parser->current.start;
|
8417
|
+
while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
|
8418
|
+
|
8419
|
+
if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
|
8420
|
+
pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
|
8421
|
+
} else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
|
8343
8422
|
pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
|
8344
8423
|
} else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
|
8345
8424
|
pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
|
@@ -8796,6 +8875,16 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8796
8875
|
type = lex_optional_float_suffix(parser, seen_e);
|
8797
8876
|
}
|
8798
8877
|
|
8878
|
+
// At this point we have a completed number, but we want to provide the user
|
8879
|
+
// with a good experience if they put an additional .xxx fractional
|
8880
|
+
// component on the end, so we'll check for that here.
|
8881
|
+
if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
|
8882
|
+
const uint8_t *fraction_start = parser->current.end;
|
8883
|
+
const uint8_t *fraction_end = parser->current.end + 2;
|
8884
|
+
fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
|
8885
|
+
pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
|
8886
|
+
}
|
8887
|
+
|
8799
8888
|
return type;
|
8800
8889
|
}
|
8801
8890
|
|
@@ -8925,8 +9014,8 @@ lex_global_variable(pm_parser_t *parser) {
|
|
8925
9014
|
// If we get here, then we have a $ followed by something that
|
8926
9015
|
// isn't recognized as a global variable.
|
8927
9016
|
pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
8928
|
-
|
8929
|
-
|
9017
|
+
const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9018
|
+
PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
|
8930
9019
|
}
|
8931
9020
|
|
8932
9021
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -9297,12 +9386,20 @@ escape_hexadecimal_digit(const uint8_t value) {
|
|
9297
9386
|
* validated.
|
9298
9387
|
*/
|
9299
9388
|
static inline uint32_t
|
9300
|
-
escape_unicode(const uint8_t *string, size_t length) {
|
9389
|
+
escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
|
9301
9390
|
uint32_t value = 0;
|
9302
9391
|
for (size_t index = 0; index < length; index++) {
|
9303
9392
|
if (index != 0) value <<= 4;
|
9304
9393
|
value |= escape_hexadecimal_digit(string[index]);
|
9305
9394
|
}
|
9395
|
+
|
9396
|
+
// Here we're going to verify that the value is actually a valid Unicode
|
9397
|
+
// codepoint and not a surrogate pair.
|
9398
|
+
if (value >= 0xD800 && value <= 0xDFFF) {
|
9399
|
+
pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
|
9400
|
+
return 0xFFFD;
|
9401
|
+
}
|
9402
|
+
|
9306
9403
|
return value;
|
9307
9404
|
}
|
9308
9405
|
|
@@ -9311,7 +9408,7 @@ escape_unicode(const uint8_t *string, size_t length) {
|
|
9311
9408
|
*/
|
9312
9409
|
static inline uint8_t
|
9313
9410
|
escape_byte(uint8_t value, const uint8_t flags) {
|
9314
|
-
if (flags & PM_ESCAPE_FLAG_CONTROL) value &=
|
9411
|
+
if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
|
9315
9412
|
if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
|
9316
9413
|
return value;
|
9317
9414
|
}
|
@@ -9411,22 +9508,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
|
|
9411
9508
|
static inline void
|
9412
9509
|
escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
|
9413
9510
|
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9414
|
-
|
9415
|
-
|
9416
|
-
uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
|
9417
|
-
uint8_t byte2 = (uint8_t) (byte & 0xF);
|
9418
|
-
|
9419
|
-
if (byte1 >= 0xA) {
|
9420
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
|
9421
|
-
} else {
|
9422
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte1 + '0'));
|
9423
|
-
}
|
9424
|
-
|
9425
|
-
if (byte2 >= 0xA) {
|
9426
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 - 0xA + 'A'));
|
9427
|
-
} else {
|
9428
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 + '0'));
|
9429
|
-
}
|
9511
|
+
pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
|
9430
9512
|
}
|
9431
9513
|
|
9432
9514
|
escape_write_byte_encoded(parser, buffer, byte);
|
@@ -9461,57 +9543,57 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9461
9543
|
switch (peek(parser)) {
|
9462
9544
|
case '\\': {
|
9463
9545
|
parser->current.end++;
|
9464
|
-
|
9546
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
|
9465
9547
|
return;
|
9466
9548
|
}
|
9467
9549
|
case '\'': {
|
9468
9550
|
parser->current.end++;
|
9469
|
-
|
9551
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
|
9470
9552
|
return;
|
9471
9553
|
}
|
9472
9554
|
case 'a': {
|
9473
9555
|
parser->current.end++;
|
9474
|
-
|
9556
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
|
9475
9557
|
return;
|
9476
9558
|
}
|
9477
9559
|
case 'b': {
|
9478
9560
|
parser->current.end++;
|
9479
|
-
|
9561
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
|
9480
9562
|
return;
|
9481
9563
|
}
|
9482
9564
|
case 'e': {
|
9483
9565
|
parser->current.end++;
|
9484
|
-
|
9566
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
|
9485
9567
|
return;
|
9486
9568
|
}
|
9487
9569
|
case 'f': {
|
9488
9570
|
parser->current.end++;
|
9489
|
-
|
9571
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
|
9490
9572
|
return;
|
9491
9573
|
}
|
9492
9574
|
case 'n': {
|
9493
9575
|
parser->current.end++;
|
9494
|
-
|
9576
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
|
9495
9577
|
return;
|
9496
9578
|
}
|
9497
9579
|
case 'r': {
|
9498
9580
|
parser->current.end++;
|
9499
|
-
|
9581
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
|
9500
9582
|
return;
|
9501
9583
|
}
|
9502
9584
|
case 's': {
|
9503
9585
|
parser->current.end++;
|
9504
|
-
|
9586
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
|
9505
9587
|
return;
|
9506
9588
|
}
|
9507
9589
|
case 't': {
|
9508
9590
|
parser->current.end++;
|
9509
|
-
|
9591
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
|
9510
9592
|
return;
|
9511
9593
|
}
|
9512
9594
|
case 'v': {
|
9513
9595
|
parser->current.end++;
|
9514
|
-
|
9596
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
|
9515
9597
|
return;
|
9516
9598
|
}
|
9517
9599
|
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
|
@@ -9528,7 +9610,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9528
9610
|
}
|
9529
9611
|
}
|
9530
9612
|
|
9531
|
-
|
9613
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
|
9532
9614
|
return;
|
9533
9615
|
}
|
9534
9616
|
case 'x': {
|
@@ -9547,8 +9629,13 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9547
9629
|
parser->current.end++;
|
9548
9630
|
}
|
9549
9631
|
|
9632
|
+
value = escape_byte(value, flags);
|
9550
9633
|
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9551
|
-
|
9634
|
+
if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
|
9635
|
+
pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
|
9636
|
+
} else {
|
9637
|
+
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
|
9638
|
+
}
|
9552
9639
|
}
|
9553
9640
|
|
9554
9641
|
escape_write_byte_encoded(parser, buffer, value);
|
@@ -9580,7 +9667,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9580
9667
|
pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
|
9581
9668
|
} else if (hexadecimal_length == 0) {
|
9582
9669
|
// there are not hexadecimal characters
|
9583
|
-
pm_parser_err(parser,
|
9670
|
+
pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
|
9671
|
+
pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
|
9584
9672
|
return;
|
9585
9673
|
}
|
9586
9674
|
|
@@ -9590,7 +9678,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9590
9678
|
extra_codepoints_start = unicode_start;
|
9591
9679
|
}
|
9592
9680
|
|
9593
|
-
uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
|
9681
|
+
uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
|
9594
9682
|
escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
|
9595
9683
|
|
9596
9684
|
parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
|
@@ -9615,7 +9703,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9615
9703
|
size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
|
9616
9704
|
|
9617
9705
|
if (length == 4) {
|
9618
|
-
uint32_t value = escape_unicode(parser->current.end, 4);
|
9706
|
+
uint32_t value = escape_unicode(parser, parser->current.end, 4);
|
9619
9707
|
|
9620
9708
|
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9621
9709
|
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
|
@@ -9651,6 +9739,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9651
9739
|
return;
|
9652
9740
|
}
|
9653
9741
|
parser->current.end++;
|
9742
|
+
|
9743
|
+
if (match(parser, 'u') || match(parser, 'U')) {
|
9744
|
+
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9745
|
+
return;
|
9746
|
+
}
|
9747
|
+
|
9654
9748
|
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
|
9655
9749
|
return;
|
9656
9750
|
case ' ':
|
@@ -9678,7 +9772,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9678
9772
|
case 'C': {
|
9679
9773
|
parser->current.end++;
|
9680
9774
|
if (peek(parser) != '-') {
|
9681
|
-
|
9775
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9776
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
|
9682
9777
|
return;
|
9683
9778
|
}
|
9684
9779
|
|
@@ -9701,6 +9796,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9701
9796
|
return;
|
9702
9797
|
}
|
9703
9798
|
parser->current.end++;
|
9799
|
+
|
9800
|
+
if (match(parser, 'u') || match(parser, 'U')) {
|
9801
|
+
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9802
|
+
return;
|
9803
|
+
}
|
9804
|
+
|
9704
9805
|
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
|
9705
9806
|
return;
|
9706
9807
|
case ' ':
|
@@ -9715,7 +9816,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9715
9816
|
return;
|
9716
9817
|
default: {
|
9717
9818
|
if (!char_is_ascii_printable(peeked)) {
|
9718
|
-
|
9819
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9820
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
|
9719
9821
|
return;
|
9720
9822
|
}
|
9721
9823
|
|
@@ -9728,7 +9830,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9728
9830
|
case 'M': {
|
9729
9831
|
parser->current.end++;
|
9730
9832
|
if (peek(parser) != '-') {
|
9731
|
-
|
9833
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9834
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
|
9732
9835
|
return;
|
9733
9836
|
}
|
9734
9837
|
|
@@ -9746,6 +9849,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9746
9849
|
return;
|
9747
9850
|
}
|
9748
9851
|
parser->current.end++;
|
9852
|
+
|
9853
|
+
if (match(parser, 'u') || match(parser, 'U')) {
|
9854
|
+
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9855
|
+
return;
|
9856
|
+
}
|
9857
|
+
|
9749
9858
|
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
|
9750
9859
|
return;
|
9751
9860
|
case ' ':
|
@@ -9760,7 +9869,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9760
9869
|
return;
|
9761
9870
|
default:
|
9762
9871
|
if (!char_is_ascii_printable(peeked)) {
|
9763
|
-
|
9872
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9873
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
|
9764
9874
|
return;
|
9765
9875
|
}
|
9766
9876
|
|
@@ -10721,6 +10831,8 @@ parser_lex(pm_parser_t *parser) {
|
|
10721
10831
|
type = PM_TOKEN_USTAR_STAR;
|
10722
10832
|
} else if (lex_state_beg_p(parser)) {
|
10723
10833
|
type = PM_TOKEN_USTAR_STAR;
|
10834
|
+
} else if (ambiguous_operator_p(parser, space_seen)) {
|
10835
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
|
10724
10836
|
}
|
10725
10837
|
|
10726
10838
|
if (lex_state_operator_p(parser)) {
|
@@ -10744,6 +10856,8 @@ parser_lex(pm_parser_t *parser) {
|
|
10744
10856
|
type = PM_TOKEN_USTAR;
|
10745
10857
|
} else if (lex_state_beg_p(parser)) {
|
10746
10858
|
type = PM_TOKEN_USTAR;
|
10859
|
+
} else if (ambiguous_operator_p(parser, space_seen)) {
|
10860
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
|
10747
10861
|
}
|
10748
10862
|
|
10749
10863
|
if (lex_state_operator_p(parser)) {
|
@@ -10860,6 +10974,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10860
10974
|
// If we have quotes, then we're going to go until we find the
|
10861
10975
|
// end quote.
|
10862
10976
|
while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
|
10977
|
+
if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
|
10863
10978
|
parser->current.end++;
|
10864
10979
|
}
|
10865
10980
|
}
|
@@ -10917,6 +11032,10 @@ parser_lex(pm_parser_t *parser) {
|
|
10917
11032
|
LEX(PM_TOKEN_LESS_LESS_EQUAL);
|
10918
11033
|
}
|
10919
11034
|
|
11035
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11036
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
|
11037
|
+
}
|
11038
|
+
|
10920
11039
|
if (lex_state_operator_p(parser)) {
|
10921
11040
|
lex_state_set(parser, PM_LEX_STATE_ARG);
|
10922
11041
|
} else {
|
@@ -11030,6 +11149,8 @@ parser_lex(pm_parser_t *parser) {
|
|
11030
11149
|
type = PM_TOKEN_UAMPERSAND;
|
11031
11150
|
} else if (lex_state_beg_p(parser)) {
|
11032
11151
|
type = PM_TOKEN_UAMPERSAND;
|
11152
|
+
} else if (ambiguous_operator_p(parser, space_seen)) {
|
11153
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
|
11033
11154
|
}
|
11034
11155
|
|
11035
11156
|
if (lex_state_operator_p(parser)) {
|
@@ -11104,6 +11225,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11104
11225
|
LEX(PM_TOKEN_UPLUS);
|
11105
11226
|
}
|
11106
11227
|
|
11228
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11229
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
|
11230
|
+
}
|
11231
|
+
|
11107
11232
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
11108
11233
|
LEX(PM_TOKEN_PLUS);
|
11109
11234
|
}
|
@@ -11141,6 +11266,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11141
11266
|
LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
|
11142
11267
|
}
|
11143
11268
|
|
11269
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11270
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
|
11271
|
+
}
|
11272
|
+
|
11144
11273
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
11145
11274
|
LEX(PM_TOKEN_MINUS);
|
11146
11275
|
}
|
@@ -11239,6 +11368,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11239
11368
|
LEX(PM_TOKEN_REGEXP_BEGIN);
|
11240
11369
|
}
|
11241
11370
|
|
11371
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11372
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
|
11373
|
+
}
|
11374
|
+
|
11242
11375
|
if (lex_state_operator_p(parser)) {
|
11243
11376
|
lex_state_set(parser, PM_LEX_STATE_ARG);
|
11244
11377
|
} else {
|
@@ -11274,7 +11407,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11274
11407
|
// operator because we don't want to move into the string
|
11275
11408
|
// lex mode unnecessarily.
|
11276
11409
|
if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
|
11277
|
-
pm_parser_err_current(parser,
|
11410
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
|
11278
11411
|
LEX(PM_TOKEN_PERCENT);
|
11279
11412
|
}
|
11280
11413
|
|
@@ -11293,10 +11426,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11293
11426
|
|
11294
11427
|
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
11295
11428
|
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
11296
|
-
|
11297
|
-
if (parser->current.end < parser->end) {
|
11298
|
-
LEX(PM_TOKEN_STRING_BEGIN);
|
11299
|
-
}
|
11429
|
+
LEX(PM_TOKEN_STRING_BEGIN);
|
11300
11430
|
}
|
11301
11431
|
|
11302
11432
|
// Delimiters for %-literals cannot be alphanumeric. We
|
@@ -11423,6 +11553,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11423
11553
|
}
|
11424
11554
|
}
|
11425
11555
|
|
11556
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11557
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
|
11558
|
+
}
|
11559
|
+
|
11426
11560
|
lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
|
11427
11561
|
LEX(PM_TOKEN_PERCENT);
|
11428
11562
|
}
|
@@ -12233,9 +12367,10 @@ parser_lex(pm_parser_t *parser) {
|
|
12233
12367
|
|
12234
12368
|
// If we are immediately following a newline and we have hit the
|
12235
12369
|
// terminator, then we need to return the ending of the heredoc.
|
12236
|
-
if (
|
12370
|
+
if (current_token_starts_line(parser)) {
|
12237
12371
|
const uint8_t *start = parser->current.start;
|
12238
|
-
|
12372
|
+
|
12373
|
+
if (!line_continuation && (start + ident_length <= parser->end)) {
|
12239
12374
|
const uint8_t *newline = next_newline(start, parser->end - start);
|
12240
12375
|
const uint8_t *ident_end = newline;
|
12241
12376
|
const uint8_t *terminator_end = newline;
|
@@ -12391,11 +12526,8 @@ parser_lex(pm_parser_t *parser) {
|
|
12391
12526
|
}
|
12392
12527
|
|
12393
12528
|
parser->current.end = breakpoint + 1;
|
12394
|
-
|
12395
|
-
|
12396
|
-
pm_token_buffer_flush(parser, &token_buffer);
|
12397
|
-
LEX(PM_TOKEN_STRING_CONTENT);
|
12398
|
-
}
|
12529
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
12530
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
12399
12531
|
}
|
12400
12532
|
|
12401
12533
|
// Otherwise we hit a newline and it wasn't followed by
|
@@ -13030,11 +13162,40 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
|
|
13030
13162
|
return (pm_node_t *) result;
|
13031
13163
|
}
|
13032
13164
|
|
13165
|
+
/**
|
13166
|
+
* When an implicit local variable is written to or targeted, it becomes a
|
13167
|
+
* regular, named local variable. This function removes it from the list of
|
13168
|
+
* implicit parameters when that happens.
|
13169
|
+
*/
|
13170
|
+
static void
|
13171
|
+
parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
|
13172
|
+
pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
|
13173
|
+
|
13174
|
+
for (size_t index = 0; index < implicit_parameters->size; index++) {
|
13175
|
+
if (implicit_parameters->nodes[index] == node) {
|
13176
|
+
// If the node is not the last one in the list, we need to shift the
|
13177
|
+
// remaining nodes down to fill the gap. This is extremely unlikely
|
13178
|
+
// to happen.
|
13179
|
+
if (index != implicit_parameters->size - 1) {
|
13180
|
+
memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
|
13181
|
+
}
|
13182
|
+
|
13183
|
+
implicit_parameters->size--;
|
13184
|
+
break;
|
13185
|
+
}
|
13186
|
+
}
|
13187
|
+
}
|
13188
|
+
|
13033
13189
|
/**
|
13034
13190
|
* Convert the given node into a valid target node.
|
13191
|
+
*
|
13192
|
+
* @param multiple Whether or not this target is part of a larger set of
|
13193
|
+
* targets. If it is, then the &. operator is not allowed.
|
13194
|
+
* @param splat Whether or not this target is a child of a splat target. If it
|
13195
|
+
* is, then fewer patterns are allowed.
|
13035
13196
|
*/
|
13036
13197
|
static pm_node_t *
|
13037
|
-
parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
13198
|
+
parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
|
13038
13199
|
switch (PM_NODE_TYPE(target)) {
|
13039
13200
|
case PM_MISSING_NODE:
|
13040
13201
|
return target;
|
@@ -13080,7 +13241,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
|
13080
13241
|
target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
|
13081
13242
|
return target;
|
13082
13243
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
13083
|
-
|
13244
|
+
if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
|
13245
|
+
PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
|
13246
|
+
parse_target_implicit_parameter(parser, target);
|
13247
|
+
}
|
13084
13248
|
|
13085
13249
|
const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
|
13086
13250
|
uint32_t name = cast->name;
|
@@ -13092,17 +13256,32 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
|
13092
13256
|
|
13093
13257
|
return target;
|
13094
13258
|
}
|
13259
|
+
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
|
13260
|
+
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
|
13261
|
+
pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
|
13262
|
+
|
13263
|
+
parse_target_implicit_parameter(parser, target);
|
13264
|
+
pm_node_destroy(parser, target);
|
13265
|
+
|
13266
|
+
return node;
|
13267
|
+
}
|
13095
13268
|
case PM_INSTANCE_VARIABLE_READ_NODE:
|
13096
13269
|
assert(sizeof(pm_instance_variable_target_node_t) == sizeof(pm_instance_variable_read_node_t));
|
13097
13270
|
target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
|
13098
13271
|
return target;
|
13099
13272
|
case PM_MULTI_TARGET_NODE:
|
13273
|
+
if (splat_parent) {
|
13274
|
+
// Multi target is not accepted in all positions. If this is one
|
13275
|
+
// of them, then we need to add an error.
|
13276
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
13277
|
+
}
|
13278
|
+
|
13100
13279
|
return target;
|
13101
13280
|
case PM_SPLAT_NODE: {
|
13102
13281
|
pm_splat_node_t *splat = (pm_splat_node_t *) target;
|
13103
13282
|
|
13104
13283
|
if (splat->expression != NULL) {
|
13105
|
-
splat->expression = parse_target(parser, splat->expression, multiple);
|
13284
|
+
splat->expression = parse_target(parser, splat->expression, multiple, true);
|
13106
13285
|
}
|
13107
13286
|
|
13108
13287
|
return (pm_node_t *) splat;
|
@@ -13172,9 +13351,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
|
13172
13351
|
*/
|
13173
13352
|
static pm_node_t *
|
13174
13353
|
parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
13175
|
-
pm_node_t *result = parse_target(parser, target, multiple);
|
13354
|
+
pm_node_t *result = parse_target(parser, target, multiple, false);
|
13176
13355
|
|
13177
|
-
// Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
|
13356
|
+
// Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
|
13357
|
+
// parens after the targets.
|
13178
13358
|
if (
|
13179
13359
|
!match1(parser, PM_TOKEN_EQUAL) &&
|
13180
13360
|
!(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
|
@@ -13244,18 +13424,34 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13244
13424
|
return (pm_node_t *) node;
|
13245
13425
|
}
|
13246
13426
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
13247
|
-
pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
|
13248
13427
|
pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
|
13249
13428
|
|
13250
13429
|
pm_constant_id_t name = local_read->name;
|
13430
|
+
pm_location_t name_loc = target->location;
|
13431
|
+
|
13251
13432
|
uint32_t depth = local_read->depth;
|
13252
|
-
|
13433
|
+
pm_scope_t *scope = pm_parser_scope_find(parser, depth);
|
13253
13434
|
|
13254
|
-
|
13435
|
+
if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
|
13436
|
+
pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
|
13437
|
+
PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
|
13438
|
+
parse_target_implicit_parameter(parser, target);
|
13439
|
+
}
|
13440
|
+
|
13441
|
+
pm_locals_unread(&scope->locals, name);
|
13255
13442
|
pm_node_destroy(parser, target);
|
13256
13443
|
|
13257
13444
|
return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
|
13258
13445
|
}
|
13446
|
+
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
|
13447
|
+
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
|
13448
|
+
pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
|
13449
|
+
|
13450
|
+
parse_target_implicit_parameter(parser, target);
|
13451
|
+
pm_node_destroy(parser, target);
|
13452
|
+
|
13453
|
+
return node;
|
13454
|
+
}
|
13259
13455
|
case PM_INSTANCE_VARIABLE_READ_NODE: {
|
13260
13456
|
pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
|
13261
13457
|
pm_node_destroy(parser, target);
|
@@ -13409,7 +13605,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13409
13605
|
bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
|
13410
13606
|
|
13411
13607
|
pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
|
13412
|
-
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
|
13608
|
+
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
|
13413
13609
|
|
13414
13610
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
13415
13611
|
if (accept1(parser, PM_TOKEN_USTAR)) {
|
@@ -13425,7 +13621,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13425
13621
|
|
13426
13622
|
if (token_begins_expression_p(parser->current.type)) {
|
13427
13623
|
name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
|
13428
|
-
name = parse_target(parser, name, true);
|
13624
|
+
name = parse_target(parser, name, true, true);
|
13429
13625
|
}
|
13430
13626
|
|
13431
13627
|
pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
|
@@ -13433,7 +13629,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13433
13629
|
has_rest = true;
|
13434
13630
|
} else if (token_begins_expression_p(parser->current.type)) {
|
13435
13631
|
pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
|
13436
|
-
target = parse_target(parser, target, true);
|
13632
|
+
target = parse_target(parser, target, true, false);
|
13437
13633
|
|
13438
13634
|
pm_multi_target_node_targets_append(parser, result, target);
|
13439
13635
|
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
@@ -13470,8 +13666,8 @@ parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_
|
|
13470
13666
|
*/
|
13471
13667
|
static pm_statements_node_t *
|
13472
13668
|
parse_statements(pm_parser_t *parser, pm_context_t context) {
|
13473
|
-
// First, skip past any optional terminators that might be at the beginning
|
13474
|
-
// the statements.
|
13669
|
+
// First, skip past any optional terminators that might be at the beginning
|
13670
|
+
// of the statements.
|
13475
13671
|
while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
|
13476
13672
|
|
13477
13673
|
// If we have a terminator, then we can just return NULL.
|
@@ -13487,20 +13683,20 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13487
13683
|
pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
|
13488
13684
|
pm_statements_node_body_append(parser, statements, node);
|
13489
13685
|
|
13490
|
-
// If we're recovering from a syntax error, then we need to stop parsing
|
13491
|
-
// statements now.
|
13686
|
+
// If we're recovering from a syntax error, then we need to stop parsing
|
13687
|
+
// the statements now.
|
13492
13688
|
if (parser->recovering) {
|
13493
|
-
// If this is the level of context where the recovery has happened,
|
13494
|
-
// we can mark the parser as done recovering.
|
13689
|
+
// If this is the level of context where the recovery has happened,
|
13690
|
+
// then we can mark the parser as done recovering.
|
13495
13691
|
if (context_terminator(context, &parser->current)) parser->recovering = false;
|
13496
13692
|
break;
|
13497
13693
|
}
|
13498
13694
|
|
13499
|
-
// If we have a terminator, then we will parse all consecutive
|
13500
|
-
// and then continue parsing the statements list.
|
13695
|
+
// If we have a terminator, then we will parse all consecutive
|
13696
|
+
// terminators and then continue parsing the statements list.
|
13501
13697
|
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
13502
|
-
// If we have a terminator, then we will continue parsing the
|
13503
|
-
// list.
|
13698
|
+
// If we have a terminator, then we will continue parsing the
|
13699
|
+
// statements list.
|
13504
13700
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
13505
13701
|
if (context_terminator(context, &parser->current)) break;
|
13506
13702
|
|
@@ -13508,27 +13704,28 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13508
13704
|
continue;
|
13509
13705
|
}
|
13510
13706
|
|
13511
|
-
// At this point we have a list of statements that are not terminated by
|
13512
|
-
// newline or semicolon. At this point we need to check if we're at
|
13513
|
-
// of the statements list. If we are, then we should break out
|
13707
|
+
// At this point we have a list of statements that are not terminated by
|
13708
|
+
// a newline or semicolon. At this point we need to check if we're at
|
13709
|
+
// the end of the statements list. If we are, then we should break out
|
13710
|
+
// of the loop.
|
13514
13711
|
if (context_terminator(context, &parser->current)) break;
|
13515
13712
|
|
13516
13713
|
// At this point, we have a syntax error, because the statement was not
|
13517
13714
|
// terminated by a newline or semicolon, and we're not at the end of the
|
13518
|
-
// statements list. Ideally we should scan forward to determine if we
|
13519
|
-
// insert a missing terminator or break out of parsing the
|
13520
|
-
// at this point.
|
13715
|
+
// statements list. Ideally we should scan forward to determine if we
|
13716
|
+
// should insert a missing terminator or break out of parsing the
|
13717
|
+
// statements list at this point.
|
13521
13718
|
//
|
13522
|
-
// We don't have that yet, so instead we'll do a more naive approach. If
|
13523
|
-
// were unable to parse an expression, then we will skip past this
|
13524
|
-
// continue parsing the statements list. Otherwise we'll add
|
13525
|
-
// continue parsing the statements list.
|
13719
|
+
// We don't have that yet, so instead we'll do a more naive approach. If
|
13720
|
+
// we were unable to parse an expression, then we will skip past this
|
13721
|
+
// token and continue parsing the statements list. Otherwise we'll add
|
13722
|
+
// an error and continue parsing the statements list.
|
13526
13723
|
if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
|
13527
13724
|
parser_lex(parser);
|
13528
13725
|
|
13529
13726
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
13530
13727
|
if (context_terminator(context, &parser->current)) break;
|
13531
|
-
} else if (!
|
13728
|
+
} else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
|
13532
13729
|
// This is an inlined version of accept1 because the error that we
|
13533
13730
|
// want to add has varargs. If this happens again, we should
|
13534
13731
|
// probably extract a helper function.
|
@@ -13550,7 +13747,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13550
13747
|
*/
|
13551
13748
|
static void
|
13552
13749
|
pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13553
|
-
const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
|
13750
|
+
const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
|
13554
13751
|
|
13555
13752
|
if (duplicated != NULL) {
|
13556
13753
|
pm_buffer_t buffer = { 0 };
|
@@ -13576,13 +13773,16 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
|
|
13576
13773
|
*/
|
13577
13774
|
static void
|
13578
13775
|
pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13579
|
-
|
13776
|
+
pm_node_t *previous;
|
13777
|
+
|
13778
|
+
if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
|
13580
13779
|
pm_diagnostic_list_append_format(
|
13581
13780
|
&parser->warning_list,
|
13582
13781
|
node->location.start,
|
13583
13782
|
node->location.end,
|
13584
13783
|
PM_WARN_DUPLICATED_WHEN_CLAUSE,
|
13585
|
-
pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
|
13784
|
+
pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
|
13785
|
+
pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
|
13586
13786
|
);
|
13587
13787
|
}
|
13588
13788
|
}
|
@@ -14036,31 +14236,37 @@ static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
|
|
14036
14236
|
* Check if current parameter follows valid parameters ordering. If not it adds
|
14037
14237
|
* an error to the list without stopping the parsing, otherwise sets the
|
14038
14238
|
* parameters state to the one corresponding to the current parameter.
|
14239
|
+
*
|
14240
|
+
* It returns true if it was successful, and false otherwise.
|
14039
14241
|
*/
|
14040
|
-
static
|
14242
|
+
static bool
|
14041
14243
|
update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
|
14042
14244
|
pm_parameters_order_t state = parameters_ordering[token->type];
|
14043
|
-
if (state == PM_PARAMETERS_NO_CHANGE) return;
|
14245
|
+
if (state == PM_PARAMETERS_NO_CHANGE) return true;
|
14044
14246
|
|
14045
14247
|
// If we see another ordered argument after a optional argument
|
14046
14248
|
// we only continue parsing ordered arguments until we stop seeing ordered arguments.
|
14047
14249
|
if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
|
14048
14250
|
*current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
|
14049
|
-
return;
|
14251
|
+
return true;
|
14050
14252
|
} else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
|
14051
|
-
return;
|
14253
|
+
return true;
|
14052
14254
|
}
|
14053
14255
|
|
14054
14256
|
if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
|
14055
14257
|
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
|
14056
|
-
|
14057
|
-
|
14058
|
-
|
14258
|
+
return false;
|
14259
|
+
} else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
|
14260
|
+
pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
|
14261
|
+
return false;
|
14262
|
+
} else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
|
14059
14263
|
// We know what transition we failed on, so we can provide a better error here.
|
14060
14264
|
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
|
14061
|
-
|
14062
|
-
*current = state;
|
14265
|
+
return false;
|
14063
14266
|
}
|
14267
|
+
|
14268
|
+
if (state < *current) *current = state;
|
14269
|
+
return true;
|
14064
14270
|
}
|
14065
14271
|
|
14066
14272
|
/**
|
@@ -14129,27 +14335,22 @@ parse_parameters(
|
|
14129
14335
|
pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
14130
14336
|
}
|
14131
14337
|
|
14132
|
-
|
14133
|
-
|
14134
|
-
parser_lex(parser);
|
14338
|
+
bool succeeded = update_parameter_state(parser, &parser->current, &order);
|
14339
|
+
parser_lex(parser);
|
14135
14340
|
|
14136
|
-
|
14341
|
+
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
|
14342
|
+
pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
|
14137
14343
|
|
14138
|
-
|
14139
|
-
|
14140
|
-
|
14141
|
-
|
14142
|
-
|
14143
|
-
|
14144
|
-
|
14145
|
-
params->keyword_rest = NULL;
|
14146
|
-
}
|
14147
|
-
pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
|
14148
|
-
} else {
|
14149
|
-
update_parameter_state(parser, &parser->current, &order);
|
14150
|
-
parser_lex(parser);
|
14344
|
+
if (params->keyword_rest != NULL) {
|
14345
|
+
// If we already have a keyword rest parameter, then we replace it with the
|
14346
|
+
// forwarding parameter and move the keyword rest parameter to the posts list.
|
14347
|
+
pm_node_t *keyword_rest = params->keyword_rest;
|
14348
|
+
pm_parameters_node_posts_append(params, keyword_rest);
|
14349
|
+
if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
|
14350
|
+
params->keyword_rest = NULL;
|
14151
14351
|
}
|
14152
14352
|
|
14353
|
+
pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
|
14153
14354
|
break;
|
14154
14355
|
}
|
14155
14356
|
case PM_TOKEN_CLASS_VARIABLE:
|
@@ -14193,7 +14394,7 @@ parse_parameters(
|
|
14193
14394
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
14194
14395
|
|
14195
14396
|
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
|
14196
|
-
uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
|
14397
|
+
uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
|
14197
14398
|
|
14198
14399
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
|
14199
14400
|
pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
|
@@ -14206,7 +14407,7 @@ parse_parameters(
|
|
14206
14407
|
// If the value of the parameter increased the number of
|
14207
14408
|
// reads of that parameter, then we need to warn that we
|
14208
14409
|
// have a circular definition.
|
14209
|
-
if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
|
14410
|
+
if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
|
14210
14411
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
|
14211
14412
|
}
|
14212
14413
|
|
@@ -14244,6 +14445,12 @@ parse_parameters(
|
|
14244
14445
|
pm_token_t local = name;
|
14245
14446
|
local.end -= 1;
|
14246
14447
|
|
14448
|
+
if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
|
14449
|
+
pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
|
14450
|
+
} else if (local.end[-1] == '!' || local.end[-1] == '?') {
|
14451
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
|
14452
|
+
}
|
14453
|
+
|
14247
14454
|
bool repeated = pm_parser_parameter_name_check(parser, &local);
|
14248
14455
|
pm_parser_local_add_token(parser, &local, 1);
|
14249
14456
|
|
@@ -14279,10 +14486,10 @@ parse_parameters(
|
|
14279
14486
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
14280
14487
|
|
14281
14488
|
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
|
14282
|
-
uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
|
14489
|
+
uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
|
14283
14490
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
|
14284
14491
|
|
14285
|
-
if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
|
14492
|
+
if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
|
14286
14493
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
|
14287
14494
|
}
|
14288
14495
|
|
@@ -14454,7 +14661,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14454
14661
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14455
14662
|
|
14456
14663
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14457
|
-
reference = parse_target(parser, reference, false);
|
14664
|
+
reference = parse_target(parser, reference, false, false);
|
14458
14665
|
|
14459
14666
|
pm_rescue_node_reference_set(rescue, reference);
|
14460
14667
|
break;
|
@@ -14484,7 +14691,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14484
14691
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14485
14692
|
|
14486
14693
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14487
|
-
reference = parse_target(parser, reference, false);
|
14694
|
+
reference = parse_target(parser, reference, false, false);
|
14488
14695
|
|
14489
14696
|
pm_rescue_node_reference_set(rescue, reference);
|
14490
14697
|
break;
|
@@ -14689,6 +14896,28 @@ parse_block_parameters(
|
|
14689
14896
|
return block_parameters;
|
14690
14897
|
}
|
14691
14898
|
|
14899
|
+
/**
|
14900
|
+
* Return true if any of the visible scopes to the current context are using
|
14901
|
+
* numbered parameters.
|
14902
|
+
*/
|
14903
|
+
static bool
|
14904
|
+
outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
|
14905
|
+
for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
|
14906
|
+
if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
|
14907
|
+
}
|
14908
|
+
|
14909
|
+
return false;
|
14910
|
+
}
|
14911
|
+
|
14912
|
+
/**
|
14913
|
+
* These are the names of the various numbered parameters. We have them here so
|
14914
|
+
* that when we insert them into the constant pool we can use a constant string
|
14915
|
+
* and not have to allocate.
|
14916
|
+
*/
|
14917
|
+
static const char * const pm_numbered_parameter_names[] = {
|
14918
|
+
"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
|
14919
|
+
};
|
14920
|
+
|
14692
14921
|
/**
|
14693
14922
|
* Return the node that should be used in the parameters field of a block-like
|
14694
14923
|
* (block or lambda) node, depending on the kind of parameters that were
|
@@ -14696,31 +14925,79 @@ parse_block_parameters(
|
|
14696
14925
|
*/
|
14697
14926
|
static pm_node_t *
|
14698
14927
|
parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
|
14699
|
-
|
14928
|
+
pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
|
14929
|
+
|
14930
|
+
// If we have ordinary parameters, then we will return them as the set of
|
14931
|
+
// parameters.
|
14932
|
+
if (parameters != NULL) {
|
14933
|
+
// If we also have implicit parameters, then this is an error.
|
14934
|
+
if (implicit_parameters->size > 0) {
|
14935
|
+
pm_node_t *node = implicit_parameters->nodes[0];
|
14936
|
+
|
14937
|
+
if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
|
14938
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
|
14939
|
+
} else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
|
14940
|
+
pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
|
14941
|
+
} else {
|
14942
|
+
assert(false && "unreachable");
|
14943
|
+
}
|
14944
|
+
}
|
14700
14945
|
|
14701
|
-
if (masked == PM_SCOPE_PARAMETERS_NONE) {
|
14702
|
-
assert(parameters == NULL);
|
14703
|
-
return NULL;
|
14704
|
-
} else if (masked == PM_SCOPE_PARAMETERS_ORDINARY) {
|
14705
|
-
assert(parameters != NULL);
|
14706
14946
|
return parameters;
|
14707
|
-
}
|
14708
|
-
assert(parameters == NULL);
|
14947
|
+
}
|
14709
14948
|
|
14710
|
-
|
14711
|
-
|
14712
|
-
|
14713
|
-
|
14949
|
+
// If we don't have any implicit parameters, then the set of parameters is
|
14950
|
+
// NULL.
|
14951
|
+
if (implicit_parameters->size == 0) {
|
14952
|
+
return NULL;
|
14953
|
+
}
|
14954
|
+
|
14955
|
+
// If we don't have ordinary parameters, then we now must validate our set
|
14956
|
+
// of implicit parameters. We can only have numbered parameters or it, but
|
14957
|
+
// they cannot be mixed.
|
14958
|
+
uint8_t numbered_parameter = 0;
|
14959
|
+
bool it_parameter = false;
|
14960
|
+
|
14961
|
+
for (size_t index = 0; index < implicit_parameters->size; index++) {
|
14962
|
+
pm_node_t *node = implicit_parameters->nodes[index];
|
14963
|
+
|
14964
|
+
if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
|
14965
|
+
if (it_parameter) {
|
14966
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
|
14967
|
+
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
14968
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
|
14969
|
+
} else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
|
14970
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
|
14971
|
+
} else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
|
14972
|
+
numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
|
14973
|
+
} else {
|
14974
|
+
assert(false && "unreachable");
|
14975
|
+
}
|
14976
|
+
} else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
|
14977
|
+
if (numbered_parameter > 0) {
|
14978
|
+
pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
|
14979
|
+
} else {
|
14980
|
+
it_parameter = true;
|
14981
|
+
}
|
14714
14982
|
}
|
14983
|
+
}
|
14715
14984
|
|
14716
|
-
|
14717
|
-
|
14718
|
-
|
14985
|
+
if (numbered_parameter > 0) {
|
14986
|
+
// Go through the parent scopes and mark them as being disallowed from
|
14987
|
+
// using numbered parameters because this inner scope is using them.
|
14988
|
+
for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
|
14989
|
+
scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
|
14990
|
+
}
|
14991
|
+
|
14992
|
+
const pm_location_t location = { .start = opening->start, .end = closing->end };
|
14993
|
+
return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
|
14994
|
+
}
|
14995
|
+
|
14996
|
+
if (it_parameter) {
|
14719
14997
|
return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
|
14720
|
-
} else {
|
14721
|
-
assert(false && "unreachable");
|
14722
|
-
return NULL;
|
14723
14998
|
}
|
14999
|
+
|
15000
|
+
return NULL;
|
14724
15001
|
}
|
14725
15002
|
|
14726
15003
|
/**
|
@@ -14737,9 +15014,6 @@ parse_block(pm_parser_t *parser) {
|
|
14737
15014
|
pm_block_parameters_node_t *block_parameters = NULL;
|
14738
15015
|
|
14739
15016
|
if (accept1(parser, PM_TOKEN_PIPE)) {
|
14740
|
-
assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
|
14741
|
-
parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
|
14742
|
-
|
14743
15017
|
pm_token_t block_parameters_opening = parser->previous;
|
14744
15018
|
if (match1(parser, PM_TOKEN_PIPE)) {
|
14745
15019
|
block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
|
@@ -14808,7 +15082,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
14808
15082
|
arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
|
14809
15083
|
} else {
|
14810
15084
|
pm_accepts_block_stack_push(parser, true);
|
14811
|
-
parse_arguments(parser, arguments,
|
15085
|
+
parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT);
|
14812
15086
|
|
14813
15087
|
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
14814
15088
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
|
@@ -14826,7 +15100,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
14826
15100
|
// If we get here, then the subsequent token cannot be used as an infix
|
14827
15101
|
// operator. In this case we assume the subsequent token is part of an
|
14828
15102
|
// argument to this method call.
|
14829
|
-
parse_arguments(parser, arguments,
|
15103
|
+
parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF);
|
14830
15104
|
|
14831
15105
|
// If we have done with the arguments and still not consumed the comma,
|
14832
15106
|
// then we have a trailing comma where we need to check whether it is
|
@@ -14857,11 +15131,8 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
14857
15131
|
if (arguments->block == NULL && !arguments->has_forwarding) {
|
14858
15132
|
arguments->block = (pm_node_t *) block;
|
14859
15133
|
} else {
|
14860
|
-
|
14861
|
-
|
14862
|
-
} else {
|
14863
|
-
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
|
14864
|
-
}
|
15134
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
|
15135
|
+
|
14865
15136
|
if (arguments->block != NULL) {
|
14866
15137
|
if (arguments->arguments == NULL) {
|
14867
15138
|
arguments->arguments = pm_arguments_node_create(parser);
|
@@ -15240,7 +15511,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
|
|
15240
15511
|
#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
|
15241
15512
|
case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
|
15242
15513
|
case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
|
15243
|
-
case PM_NUMBERED_REFERENCE_READ_NODE
|
15514
|
+
case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
|
15244
15515
|
|
15245
15516
|
// Assert here that the flags are the same so that we can safely switch the type
|
15246
15517
|
// of the node without having to move the flags.
|
@@ -15298,6 +15569,10 @@ parse_string_part(pm_parser_t *parser) {
|
|
15298
15569
|
// "aaa #{bbb} #@ccc ddd"
|
15299
15570
|
// ^^^^^^
|
15300
15571
|
case PM_TOKEN_EMBEXPR_BEGIN: {
|
15572
|
+
// Ruby disallows seeing encoding around interpolation in strings,
|
15573
|
+
// even though it is known at parse time.
|
15574
|
+
parser->explicit_encoding = NULL;
|
15575
|
+
|
15301
15576
|
pm_lex_state_t state = parser->lex_state;
|
15302
15577
|
int brace_nesting = parser->brace_nesting;
|
15303
15578
|
|
@@ -15320,6 +15595,13 @@ parse_string_part(pm_parser_t *parser) {
|
|
15320
15595
|
expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
|
15321
15596
|
pm_token_t closing = parser->previous;
|
15322
15597
|
|
15598
|
+
// If this set of embedded statements only contains a single
|
15599
|
+
// statement, then Ruby does not consider it as a possible statement
|
15600
|
+
// that could emit a line event.
|
15601
|
+
if (statements != NULL && statements->body.size == 1) {
|
15602
|
+
pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
|
15603
|
+
}
|
15604
|
+
|
15323
15605
|
return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
|
15324
15606
|
}
|
15325
15607
|
|
@@ -15330,6 +15612,10 @@ parse_string_part(pm_parser_t *parser) {
|
|
15330
15612
|
// "aaa #{bbb} #@ccc ddd"
|
15331
15613
|
// ^^^^^
|
15332
15614
|
case PM_TOKEN_EMBVAR: {
|
15615
|
+
// Ruby disallows seeing encoding around interpolation in strings,
|
15616
|
+
// even though it is known at parse time.
|
15617
|
+
parser->explicit_encoding = NULL;
|
15618
|
+
|
15333
15619
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
15334
15620
|
parser_lex(parser);
|
15335
15621
|
|
@@ -15644,75 +15930,44 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
|
|
15644
15930
|
}
|
15645
15931
|
}
|
15646
15932
|
|
15647
|
-
/**
|
15648
|
-
* Return true if any of the visible scopes to the current context are using
|
15649
|
-
* numbered parameters.
|
15650
|
-
*/
|
15651
|
-
static bool
|
15652
|
-
outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
|
15653
|
-
for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
|
15654
|
-
if (scope->numbered_parameters > 0) return true;
|
15655
|
-
}
|
15656
|
-
|
15657
|
-
return false;
|
15658
|
-
}
|
15659
|
-
|
15660
|
-
/**
|
15661
|
-
* These are the names of the various numbered parameters. We have them here so
|
15662
|
-
* that when we insert them into the constant pool we can use a constant string
|
15663
|
-
* and not have to allocate.
|
15664
|
-
*/
|
15665
|
-
static const char * const pm_numbered_parameter_names[] = {
|
15666
|
-
"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
|
15667
|
-
};
|
15668
|
-
|
15669
15933
|
/**
|
15670
15934
|
* Parse an identifier into either a local variable read. If the local variable
|
15671
15935
|
* is not found, it returns NULL instead.
|
15672
15936
|
*/
|
15673
|
-
static
|
15937
|
+
static pm_node_t *
|
15674
15938
|
parse_variable(pm_parser_t *parser) {
|
15939
|
+
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
|
15675
15940
|
int depth;
|
15676
|
-
|
15677
|
-
|
15941
|
+
|
15942
|
+
if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
|
15943
|
+
return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
|
15678
15944
|
}
|
15679
15945
|
|
15680
15946
|
pm_scope_t *current_scope = parser->current_scope;
|
15681
|
-
if (!current_scope->closed && current_scope->
|
15682
|
-
|
15683
|
-
|
15684
|
-
|
15685
|
-
|
15686
|
-
|
15687
|
-
|
15688
|
-
|
15689
|
-
|
15690
|
-
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
15691
|
-
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
15692
|
-
} else {
|
15693
|
-
// Indicate that this scope is using numbered params so that child
|
15694
|
-
// scopes cannot. We subtract the value for the character '0' to get
|
15695
|
-
// the actual integer value of the number (only _1 through _9 are
|
15696
|
-
// valid).
|
15697
|
-
int8_t numbered_parameters = (int8_t) (parser->previous.start[1] - '0');
|
15698
|
-
current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED;
|
15699
|
-
|
15700
|
-
if (numbered_parameters > current_scope->numbered_parameters) {
|
15701
|
-
current_scope->numbered_parameters = numbered_parameters;
|
15947
|
+
if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
|
15948
|
+
if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
|
15949
|
+
// When you use a numbered parameter, it implies the existence of
|
15950
|
+
// all of the locals that exist before it. For example, referencing
|
15951
|
+
// _2 means that _1 must exist. Therefore here we loop through all
|
15952
|
+
// of the possibilities and add them into the constant pool.
|
15953
|
+
uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
|
15954
|
+
for (uint8_t number = 1; number <= maximum; number++) {
|
15955
|
+
pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
|
15702
15956
|
}
|
15703
15957
|
|
15704
|
-
|
15705
|
-
|
15706
|
-
// referencing _2 means that _1 must exist. Therefore here we
|
15707
|
-
// loop through all of the possibilities and add them into the
|
15708
|
-
// constant pool.
|
15709
|
-
for (int8_t numbered_param = 1; numbered_param <= numbered_parameters - 1; numbered_param++) {
|
15710
|
-
pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_param - 1], 2);
|
15958
|
+
if (!match1(parser, PM_TOKEN_EQUAL)) {
|
15959
|
+
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
|
15711
15960
|
}
|
15712
15961
|
|
15713
|
-
|
15714
|
-
|
15715
|
-
|
15962
|
+
pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
|
15963
|
+
pm_node_list_append(¤t_scope->implicit_parameters, node);
|
15964
|
+
|
15965
|
+
return node;
|
15966
|
+
} else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
|
15967
|
+
pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
|
15968
|
+
pm_node_list_append(¤t_scope->implicit_parameters, node);
|
15969
|
+
|
15970
|
+
return node;
|
15716
15971
|
}
|
15717
15972
|
}
|
15718
15973
|
|
@@ -15727,8 +15982,8 @@ parse_variable_call(pm_parser_t *parser) {
|
|
15727
15982
|
pm_node_flags_t flags = 0;
|
15728
15983
|
|
15729
15984
|
if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
|
15730
|
-
|
15731
|
-
if (node != NULL) return
|
15985
|
+
pm_node_t *node = parse_variable(parser);
|
15986
|
+
if (node != NULL) return node;
|
15732
15987
|
flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
|
15733
15988
|
}
|
15734
15989
|
|
@@ -15846,127 +16101,355 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
|
|
15846
16101
|
nodes->size = write_index;
|
15847
16102
|
}
|
15848
16103
|
|
15849
|
-
static pm_node_t *
|
15850
|
-
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id);
|
15851
|
-
|
15852
16104
|
/**
|
15853
|
-
*
|
15854
|
-
* expression. If it is duplicated from a previous local, then we'll need to add
|
15855
|
-
* an error to the parser.
|
16105
|
+
* Return a string content token at a particular location that is empty.
|
15856
16106
|
*/
|
15857
|
-
static
|
15858
|
-
|
15859
|
-
|
15860
|
-
if (*location->start == '_') return;
|
15861
|
-
|
15862
|
-
if (pm_constant_id_list_includes(captures, capture)) {
|
15863
|
-
pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
|
15864
|
-
} else {
|
15865
|
-
pm_constant_id_list_append(captures, capture);
|
15866
|
-
}
|
16107
|
+
static pm_token_t
|
16108
|
+
parse_strings_empty_content(const uint8_t *location) {
|
16109
|
+
return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
|
15867
16110
|
}
|
15868
16111
|
|
15869
16112
|
/**
|
15870
|
-
*
|
16113
|
+
* Parse a set of strings that could be concatenated together.
|
15871
16114
|
*/
|
15872
|
-
static pm_node_t *
|
15873
|
-
|
15874
|
-
|
15875
|
-
// path nodes.
|
15876
|
-
while (accept1(parser, PM_TOKEN_COLON_COLON)) {
|
15877
|
-
pm_token_t delimiter = parser->previous;
|
15878
|
-
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
15879
|
-
node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
15880
|
-
}
|
15881
|
-
|
15882
|
-
// If there is a [ or ( that follows, then this is part of a larger pattern
|
15883
|
-
// expression. We'll parse the inner pattern here, then modify the returned
|
15884
|
-
// inner pattern with our constant path attached.
|
15885
|
-
if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
|
15886
|
-
return node;
|
15887
|
-
}
|
16115
|
+
static inline pm_node_t *
|
16116
|
+
parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
16117
|
+
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
|
15888
16118
|
|
15889
|
-
|
15890
|
-
|
15891
|
-
pm_node_t *inner = NULL;
|
16119
|
+
bool concating = false;
|
16120
|
+
bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
|
15892
16121
|
|
15893
|
-
|
15894
|
-
|
15895
|
-
accept1(parser, PM_TOKEN_NEWLINE);
|
16122
|
+
while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
16123
|
+
pm_node_t *node = NULL;
|
15896
16124
|
|
15897
|
-
|
15898
|
-
|
15899
|
-
|
15900
|
-
|
15901
|
-
|
16125
|
+
// Here we have found a string literal. We'll parse it and add it to
|
16126
|
+
// the list of strings.
|
16127
|
+
const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
16128
|
+
assert(lex_mode->mode == PM_LEX_STRING);
|
16129
|
+
bool lex_interpolation = lex_mode->as.string.interpolation;
|
15902
16130
|
|
15903
|
-
|
15904
|
-
} else {
|
16131
|
+
pm_token_t opening = parser->current;
|
15905
16132
|
parser_lex(parser);
|
15906
|
-
opening = parser->previous;
|
15907
|
-
accept1(parser, PM_TOKEN_NEWLINE);
|
15908
|
-
|
15909
|
-
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
15910
|
-
inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
|
15911
|
-
accept1(parser, PM_TOKEN_NEWLINE);
|
15912
|
-
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
|
15913
|
-
}
|
15914
|
-
|
15915
|
-
closing = parser->previous;
|
15916
|
-
}
|
15917
|
-
|
15918
|
-
if (!inner) {
|
15919
|
-
// If there was no inner pattern, then we have something like Foo() or
|
15920
|
-
// Foo[]. In that case we'll create an array pattern with no requireds.
|
15921
|
-
return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
|
15922
|
-
}
|
15923
16133
|
|
15924
|
-
|
15925
|
-
|
15926
|
-
|
15927
|
-
|
15928
|
-
|
15929
|
-
|
15930
|
-
|
16134
|
+
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16135
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16136
|
+
// If we get here, then we have an end immediately after a
|
16137
|
+
// start. In that case we'll create an empty content token and
|
16138
|
+
// return an uninterpolated string.
|
16139
|
+
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16140
|
+
pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
|
15931
16141
|
|
15932
|
-
|
15933
|
-
|
15934
|
-
|
16142
|
+
pm_string_shared_init(&string->unescaped, content.start, content.end);
|
16143
|
+
node = (pm_node_t *) string;
|
16144
|
+
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16145
|
+
// If we get here, then we have an end of a label immediately
|
16146
|
+
// after a start. In that case we'll create an empty symbol
|
16147
|
+
// node.
|
16148
|
+
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16149
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
|
15935
16150
|
|
15936
|
-
|
15937
|
-
|
15938
|
-
|
16151
|
+
pm_string_shared_init(&symbol->unescaped, content.start, content.end);
|
16152
|
+
node = (pm_node_t *) symbol;
|
16153
|
+
} else if (!lex_interpolation) {
|
16154
|
+
// If we don't accept interpolation then we expect the string to
|
16155
|
+
// start with a single string content node.
|
16156
|
+
pm_string_t unescaped;
|
16157
|
+
pm_token_t content;
|
15939
16158
|
|
15940
|
-
|
16159
|
+
if (match1(parser, PM_TOKEN_EOF)) {
|
16160
|
+
unescaped = PM_STRING_EMPTY;
|
16161
|
+
content = not_provided(parser);
|
16162
|
+
} else {
|
16163
|
+
unescaped = parser->current_string;
|
16164
|
+
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
|
16165
|
+
content = parser->previous;
|
15941
16166
|
}
|
15942
16167
|
|
15943
|
-
|
15944
|
-
|
15945
|
-
|
15946
|
-
|
15947
|
-
|
15948
|
-
|
15949
|
-
|
15950
|
-
|
15951
|
-
|
15952
|
-
|
15953
|
-
|
15954
|
-
|
16168
|
+
// It is unfortunately possible to have multiple string content
|
16169
|
+
// nodes in a row in the case that there's heredoc content in
|
16170
|
+
// the middle of the string, like this cursed example:
|
16171
|
+
//
|
16172
|
+
// <<-END+'b
|
16173
|
+
// a
|
16174
|
+
// END
|
16175
|
+
// c'+'d'
|
16176
|
+
//
|
16177
|
+
// In that case we need to switch to an interpolated string to
|
16178
|
+
// be able to contain all of the parts.
|
16179
|
+
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16180
|
+
pm_node_list_t parts = { 0 };
|
15955
16181
|
|
15956
|
-
|
15957
|
-
|
16182
|
+
pm_token_t delimiters = not_provided(parser);
|
16183
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
|
16184
|
+
pm_node_list_append(&parts, part);
|
15958
16185
|
|
15959
|
-
|
15960
|
-
|
15961
|
-
|
15962
|
-
|
16186
|
+
do {
|
16187
|
+
part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
|
16188
|
+
pm_node_list_append(&parts, part);
|
16189
|
+
parser_lex(parser);
|
16190
|
+
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
15963
16191
|
|
15964
|
-
|
15965
|
-
|
15966
|
-
pattern_node->base.location.end = closing.end;
|
16192
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16193
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
15967
16194
|
|
15968
|
-
|
15969
|
-
|
16195
|
+
pm_node_list_free(&parts);
|
16196
|
+
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16197
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16198
|
+
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16199
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
16200
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16201
|
+
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
|
16202
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16203
|
+
} else {
|
16204
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
|
16205
|
+
parser->previous.start = parser->previous.end;
|
16206
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
16207
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16208
|
+
}
|
16209
|
+
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16210
|
+
// In this case we've hit string content so we know the string
|
16211
|
+
// at least has something in it. We'll need to check if the
|
16212
|
+
// following token is the end (in which case we can return a
|
16213
|
+
// plain string) or if it's not then it has interpolation.
|
16214
|
+
pm_token_t content = parser->current;
|
16215
|
+
pm_string_t unescaped = parser->current_string;
|
16216
|
+
parser_lex(parser);
|
16217
|
+
|
16218
|
+
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16219
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16220
|
+
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
16221
|
+
|
16222
|
+
// Kind of odd behavior, but basically if we have an
|
16223
|
+
// unterminated string and it ends in a newline, we back up one
|
16224
|
+
// character so that the error message is on the last line of
|
16225
|
+
// content in the string.
|
16226
|
+
if (!accept1(parser, PM_TOKEN_STRING_END)) {
|
16227
|
+
const uint8_t *location = parser->previous.end;
|
16228
|
+
if (location > parser->start && location[-1] == '\n') location--;
|
16229
|
+
pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
|
16230
|
+
|
16231
|
+
parser->previous.start = parser->previous.end;
|
16232
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
16233
|
+
}
|
16234
|
+
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16235
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16236
|
+
} else {
|
16237
|
+
// If we get here, then we have interpolation so we'll need
|
16238
|
+
// to create a string or symbol node with interpolation.
|
16239
|
+
pm_node_list_t parts = { 0 };
|
16240
|
+
pm_token_t string_opening = not_provided(parser);
|
16241
|
+
pm_token_t string_closing = not_provided(parser);
|
16242
|
+
|
16243
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
|
16244
|
+
pm_node_flag_set(part, parse_unescaped_encoding(parser));
|
16245
|
+
pm_node_list_append(&parts, part);
|
16246
|
+
|
16247
|
+
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16248
|
+
if ((part = parse_string_part(parser)) != NULL) {
|
16249
|
+
pm_node_list_append(&parts, part);
|
16250
|
+
}
|
16251
|
+
}
|
16252
|
+
|
16253
|
+
if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16254
|
+
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16255
|
+
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16256
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16257
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16258
|
+
} else {
|
16259
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16260
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16261
|
+
}
|
16262
|
+
|
16263
|
+
pm_node_list_free(&parts);
|
16264
|
+
}
|
16265
|
+
} else {
|
16266
|
+
// If we get here, then the first part of the string is not plain
|
16267
|
+
// string content, in which case we need to parse the string as an
|
16268
|
+
// interpolated string.
|
16269
|
+
pm_node_list_t parts = { 0 };
|
16270
|
+
pm_node_t *part;
|
16271
|
+
|
16272
|
+
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16273
|
+
if ((part = parse_string_part(parser)) != NULL) {
|
16274
|
+
pm_node_list_append(&parts, part);
|
16275
|
+
}
|
16276
|
+
}
|
16277
|
+
|
16278
|
+
if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16279
|
+
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16280
|
+
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16281
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16282
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16283
|
+
} else {
|
16284
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16285
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16286
|
+
}
|
16287
|
+
|
16288
|
+
pm_node_list_free(&parts);
|
16289
|
+
}
|
16290
|
+
|
16291
|
+
if (current == NULL) {
|
16292
|
+
// If the node we just parsed is a symbol node, then we can't
|
16293
|
+
// concatenate it with anything else, so we can now return that
|
16294
|
+
// node.
|
16295
|
+
if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
|
16296
|
+
return node;
|
16297
|
+
}
|
16298
|
+
|
16299
|
+
// If we don't already have a node, then it's fine and we can just
|
16300
|
+
// set the result to be the node we just parsed.
|
16301
|
+
current = node;
|
16302
|
+
} else {
|
16303
|
+
// Otherwise we need to check the type of the node we just parsed.
|
16304
|
+
// If it cannot be concatenated with the previous node, then we'll
|
16305
|
+
// need to add a syntax error.
|
16306
|
+
if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
|
16307
|
+
pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
|
16308
|
+
}
|
16309
|
+
|
16310
|
+
// If we haven't already created our container for concatenation,
|
16311
|
+
// we'll do that now.
|
16312
|
+
if (!concating) {
|
16313
|
+
concating = true;
|
16314
|
+
pm_token_t bounds = not_provided(parser);
|
16315
|
+
|
16316
|
+
pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
|
16317
|
+
pm_interpolated_string_node_append(container, current);
|
16318
|
+
current = (pm_node_t *) container;
|
16319
|
+
}
|
16320
|
+
|
16321
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
|
16322
|
+
}
|
16323
|
+
}
|
16324
|
+
|
16325
|
+
return current;
|
16326
|
+
}
|
16327
|
+
|
16328
|
+
#define PM_PARSE_PATTERN_SINGLE 0
|
16329
|
+
#define PM_PARSE_PATTERN_TOP 1
|
16330
|
+
#define PM_PARSE_PATTERN_MULTI 2
|
16331
|
+
|
16332
|
+
static pm_node_t *
|
16333
|
+
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id);
|
16334
|
+
|
16335
|
+
/**
|
16336
|
+
* Add the newly created local to the list of captures for this pattern matching
|
16337
|
+
* expression. If it is duplicated from a previous local, then we'll need to add
|
16338
|
+
* an error to the parser.
|
16339
|
+
*/
|
16340
|
+
static void
|
16341
|
+
parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
|
16342
|
+
// Skip this capture if it starts with an underscore.
|
16343
|
+
if (*location->start == '_') return;
|
16344
|
+
|
16345
|
+
if (pm_constant_id_list_includes(captures, capture)) {
|
16346
|
+
pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
|
16347
|
+
} else {
|
16348
|
+
pm_constant_id_list_append(captures, capture);
|
16349
|
+
}
|
16350
|
+
}
|
16351
|
+
|
16352
|
+
/**
|
16353
|
+
* Accept any number of constants joined by :: delimiters.
|
16354
|
+
*/
|
16355
|
+
static pm_node_t *
|
16356
|
+
parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node) {
|
16357
|
+
// Now, if there are any :: operators that follow, parse them as constant
|
16358
|
+
// path nodes.
|
16359
|
+
while (accept1(parser, PM_TOKEN_COLON_COLON)) {
|
16360
|
+
pm_token_t delimiter = parser->previous;
|
16361
|
+
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
16362
|
+
node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
16363
|
+
}
|
16364
|
+
|
16365
|
+
// If there is a [ or ( that follows, then this is part of a larger pattern
|
16366
|
+
// expression. We'll parse the inner pattern here, then modify the returned
|
16367
|
+
// inner pattern with our constant path attached.
|
16368
|
+
if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
|
16369
|
+
return node;
|
16370
|
+
}
|
16371
|
+
|
16372
|
+
pm_token_t opening;
|
16373
|
+
pm_token_t closing;
|
16374
|
+
pm_node_t *inner = NULL;
|
16375
|
+
|
16376
|
+
if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
|
16377
|
+
opening = parser->previous;
|
16378
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
16379
|
+
|
16380
|
+
if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
|
16381
|
+
inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
|
16382
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
16383
|
+
expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
|
16384
|
+
}
|
16385
|
+
|
16386
|
+
closing = parser->previous;
|
16387
|
+
} else {
|
16388
|
+
parser_lex(parser);
|
16389
|
+
opening = parser->previous;
|
16390
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
16391
|
+
|
16392
|
+
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
16393
|
+
inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
|
16394
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
16395
|
+
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
|
16396
|
+
}
|
16397
|
+
|
16398
|
+
closing = parser->previous;
|
16399
|
+
}
|
16400
|
+
|
16401
|
+
if (!inner) {
|
16402
|
+
// If there was no inner pattern, then we have something like Foo() or
|
16403
|
+
// Foo[]. In that case we'll create an array pattern with no requireds.
|
16404
|
+
return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
|
16405
|
+
}
|
16406
|
+
|
16407
|
+
// Now that we have the inner pattern, check to see if it's an array, find,
|
16408
|
+
// or hash pattern. If it is, then we'll attach our constant path to it if
|
16409
|
+
// it doesn't already have a constant. If it's not one of those node types
|
16410
|
+
// or it does have a constant, then we'll create an array pattern.
|
16411
|
+
switch (PM_NODE_TYPE(inner)) {
|
16412
|
+
case PM_ARRAY_PATTERN_NODE: {
|
16413
|
+
pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
|
16414
|
+
|
16415
|
+
if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
|
16416
|
+
pattern_node->base.location.start = node->location.start;
|
16417
|
+
pattern_node->base.location.end = closing.end;
|
16418
|
+
|
16419
|
+
pattern_node->constant = node;
|
16420
|
+
pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
|
16421
|
+
pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
|
16422
|
+
|
16423
|
+
return (pm_node_t *) pattern_node;
|
16424
|
+
}
|
16425
|
+
|
16426
|
+
break;
|
16427
|
+
}
|
16428
|
+
case PM_FIND_PATTERN_NODE: {
|
16429
|
+
pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
|
16430
|
+
|
16431
|
+
if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
|
16432
|
+
pattern_node->base.location.start = node->location.start;
|
16433
|
+
pattern_node->base.location.end = closing.end;
|
16434
|
+
|
16435
|
+
pattern_node->constant = node;
|
16436
|
+
pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
|
16437
|
+
pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
|
16438
|
+
|
16439
|
+
return (pm_node_t *) pattern_node;
|
16440
|
+
}
|
16441
|
+
|
16442
|
+
break;
|
16443
|
+
}
|
16444
|
+
case PM_HASH_PATTERN_NODE: {
|
16445
|
+
pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
|
16446
|
+
|
16447
|
+
if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
|
16448
|
+
pattern_node->base.location.start = node->location.start;
|
16449
|
+
pattern_node->base.location.end = closing.end;
|
16450
|
+
|
16451
|
+
pattern_node->constant = node;
|
16452
|
+
pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
|
15970
16453
|
pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
|
15971
16454
|
|
15972
16455
|
return (pm_node_t *) pattern_node;
|
@@ -16055,6 +16538,33 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
|
|
16055
16538
|
return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
|
16056
16539
|
}
|
16057
16540
|
|
16541
|
+
/**
|
16542
|
+
* Check that the slice of the source given by the bounds parameters constitutes
|
16543
|
+
* a valid local variable name.
|
16544
|
+
*/
|
16545
|
+
static bool
|
16546
|
+
pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
16547
|
+
ptrdiff_t length = end - start;
|
16548
|
+
if (length == 0) return false;
|
16549
|
+
|
16550
|
+
// First ensure that it starts with a valid identifier starting character.
|
16551
|
+
size_t width = char_is_identifier_start(parser, start);
|
16552
|
+
if (width == 0) return false;
|
16553
|
+
|
16554
|
+
// Next, ensure that it's not an uppercase character.
|
16555
|
+
if (parser->encoding_changed) {
|
16556
|
+
if (parser->encoding->isupper_char(start, length)) return false;
|
16557
|
+
} else {
|
16558
|
+
if (pm_encoding_utf_8_isupper_char(start, length)) return false;
|
16559
|
+
}
|
16560
|
+
|
16561
|
+
// Next, iterate through all of the bytes of the string to ensure that they
|
16562
|
+
// are all valid identifier characters.
|
16563
|
+
const uint8_t *cursor = start + width;
|
16564
|
+
while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
|
16565
|
+
return cursor == end;
|
16566
|
+
}
|
16567
|
+
|
16058
16568
|
/**
|
16059
16569
|
* Create an implicit node for the value of a hash pattern that has omitted the
|
16060
16570
|
* value. This will use an implicit local variable target.
|
@@ -16062,14 +16572,18 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
|
|
16062
16572
|
static pm_node_t *
|
16063
16573
|
parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
|
16064
16574
|
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
|
16065
|
-
pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
|
16066
16575
|
|
16576
|
+
pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
|
16067
16577
|
int depth = -1;
|
16068
|
-
|
16069
|
-
|
16070
|
-
PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
|
16071
|
-
} else {
|
16578
|
+
|
16579
|
+
if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
|
16072
16580
|
depth = pm_parser_local_depth_constant_id(parser, constant_id);
|
16581
|
+
} else {
|
16582
|
+
pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
|
16583
|
+
|
16584
|
+
if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
|
16585
|
+
PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
|
16586
|
+
}
|
16073
16587
|
}
|
16074
16588
|
|
16075
16589
|
if (depth == -1) {
|
@@ -16093,7 +16607,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
|
|
16093
16607
|
*/
|
16094
16608
|
static void
|
16095
16609
|
parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
|
16096
|
-
if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
|
16610
|
+
if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
|
16097
16611
|
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
|
16098
16612
|
}
|
16099
16613
|
}
|
@@ -16124,7 +16638,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
16124
16638
|
} else {
|
16125
16639
|
// Here we have a value for the first assoc in the list, so
|
16126
16640
|
// we will parse it now.
|
16127
|
-
value = parse_pattern(parser, captures,
|
16641
|
+
value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
|
16128
16642
|
}
|
16129
16643
|
|
16130
16644
|
pm_token_t operator = not_provided(parser);
|
@@ -16139,7 +16653,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
16139
16653
|
// If we get anything else, then this is an error. For this we'll
|
16140
16654
|
// create a missing node for the value and create an assoc node for
|
16141
16655
|
// the first node in the list.
|
16142
|
-
|
16656
|
+
pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
|
16657
|
+
pm_parser_err_node(parser, first_node, diag_id);
|
16143
16658
|
|
16144
16659
|
pm_token_t operator = not_provided(parser);
|
16145
16660
|
pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
|
@@ -16167,8 +16682,20 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
16167
16682
|
pm_node_list_append(&assocs, assoc);
|
16168
16683
|
}
|
16169
16684
|
} else {
|
16170
|
-
|
16171
|
-
|
16685
|
+
pm_node_t *key;
|
16686
|
+
|
16687
|
+
if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
16688
|
+
key = parse_strings(parser, NULL);
|
16689
|
+
|
16690
|
+
if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
|
16691
|
+
pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
|
16692
|
+
} else if (!pm_symbol_node_label_p(key)) {
|
16693
|
+
pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
|
16694
|
+
}
|
16695
|
+
} else {
|
16696
|
+
expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
|
16697
|
+
key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
16698
|
+
}
|
16172
16699
|
|
16173
16700
|
parse_pattern_hash_key(parser, &keys, key);
|
16174
16701
|
pm_node_t *value = NULL;
|
@@ -16176,7 +16703,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
16176
16703
|
if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
16177
16704
|
value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
|
16178
16705
|
} else {
|
16179
|
-
value = parse_pattern(parser, captures,
|
16706
|
+
value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
|
16180
16707
|
}
|
16181
16708
|
|
16182
16709
|
pm_token_t operator = not_provided(parser);
|
@@ -16233,7 +16760,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
16233
16760
|
|
16234
16761
|
// Otherwise, we'll parse the inner pattern, then deal with it depending
|
16235
16762
|
// on the type it returns.
|
16236
|
-
pm_node_t *inner = parse_pattern(parser, captures,
|
16763
|
+
pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
|
16237
16764
|
|
16238
16765
|
accept1(parser, PM_TOKEN_NEWLINE);
|
16239
16766
|
expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
|
@@ -16300,11 +16827,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
16300
16827
|
first_node = parse_pattern_keyword_rest(parser, captures);
|
16301
16828
|
break;
|
16302
16829
|
case PM_TOKEN_STRING_BEGIN:
|
16303
|
-
first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false,
|
16830
|
+
first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY_LABEL);
|
16304
16831
|
break;
|
16305
16832
|
default: {
|
16833
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
|
16306
16834
|
parser_lex(parser);
|
16307
|
-
pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
|
16308
16835
|
|
16309
16836
|
first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
16310
16837
|
break;
|
@@ -16380,19 +16907,8 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
16380
16907
|
pm_node_t *variable = (pm_node_t *) parse_variable(parser);
|
16381
16908
|
|
16382
16909
|
if (variable == NULL) {
|
16383
|
-
|
16384
|
-
|
16385
|
-
!parser->current_scope->closed &&
|
16386
|
-
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
16387
|
-
pm_token_is_it(parser->previous.start, parser->previous.end)
|
16388
|
-
) {
|
16389
|
-
pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
|
16390
|
-
if (read == NULL) read = pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
16391
|
-
variable = (pm_node_t *) read;
|
16392
|
-
} else {
|
16393
|
-
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
|
16394
|
-
variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
|
16395
|
-
}
|
16910
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
|
16911
|
+
variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
|
16396
16912
|
}
|
16397
16913
|
|
16398
16914
|
return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
|
@@ -16506,7 +17022,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
|
|
16506
17022
|
pm_token_t opening = parser->current;
|
16507
17023
|
parser_lex(parser);
|
16508
17024
|
|
16509
|
-
pm_node_t *body = parse_pattern(parser, captures,
|
17025
|
+
pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
|
16510
17026
|
accept1(parser, PM_TOKEN_NEWLINE);
|
16511
17027
|
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
|
16512
17028
|
pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
|
@@ -16565,7 +17081,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
|
|
16565
17081
|
* Parse a pattern matching expression.
|
16566
17082
|
*/
|
16567
17083
|
static pm_node_t *
|
16568
|
-
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures,
|
17084
|
+
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id) {
|
16569
17085
|
pm_node_t *node = NULL;
|
16570
17086
|
|
16571
17087
|
bool leading_rest = false;
|
@@ -16575,14 +17091,26 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
|
|
16575
17091
|
case PM_TOKEN_LABEL: {
|
16576
17092
|
parser_lex(parser);
|
16577
17093
|
pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
16578
|
-
|
17094
|
+
node = (pm_node_t *) parse_pattern_hash(parser, captures, key);
|
17095
|
+
|
17096
|
+
if (!(flags & PM_PARSE_PATTERN_TOP)) {
|
17097
|
+
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
|
17098
|
+
}
|
17099
|
+
|
17100
|
+
return node;
|
16579
17101
|
}
|
16580
17102
|
case PM_TOKEN_USTAR_STAR: {
|
16581
17103
|
node = parse_pattern_keyword_rest(parser, captures);
|
16582
|
-
|
17104
|
+
node = (pm_node_t *) parse_pattern_hash(parser, captures, node);
|
17105
|
+
|
17106
|
+
if (!(flags & PM_PARSE_PATTERN_TOP)) {
|
17107
|
+
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
|
17108
|
+
}
|
17109
|
+
|
17110
|
+
return node;
|
16583
17111
|
}
|
16584
17112
|
case PM_TOKEN_USTAR: {
|
16585
|
-
if (
|
17113
|
+
if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
|
16586
17114
|
parser_lex(parser);
|
16587
17115
|
node = (pm_node_t *) parse_pattern_rest(parser, captures);
|
16588
17116
|
leading_rest = true;
|
@@ -16601,7 +17129,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
|
|
16601
17129
|
return (pm_node_t *) parse_pattern_hash(parser, captures, node);
|
16602
17130
|
}
|
16603
17131
|
|
16604
|
-
if (
|
17132
|
+
if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
|
16605
17133
|
// If we have a comma, then we are now parsing either an array pattern or a
|
16606
17134
|
// find pattern. We need to parse all of the patterns, put them into a big
|
16607
17135
|
// list, and then determine which type of node we have.
|
@@ -16642,262 +17170,53 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
|
|
16642
17170
|
if (PM_NODE_TYPE_P(nodes.nodes[0], PM_SPLAT_NODE) && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
|
16643
17171
|
node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
|
16644
17172
|
} else {
|
16645
|
-
node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
|
16646
|
-
}
|
16647
|
-
|
16648
|
-
xfree(nodes.nodes);
|
16649
|
-
} else if (leading_rest) {
|
16650
|
-
// Otherwise, if we parsed a single splat pattern, then we know we have an
|
16651
|
-
// array pattern, so we can go ahead and create that node.
|
16652
|
-
node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
|
16653
|
-
}
|
16654
|
-
|
16655
|
-
return node;
|
16656
|
-
}
|
16657
|
-
|
16658
|
-
/**
|
16659
|
-
* Incorporate a negative sign into a numeric node by subtracting 1 character
|
16660
|
-
* from its start bounds. If it's a compound node, then we will recursively
|
16661
|
-
* apply this function to its value.
|
16662
|
-
*/
|
16663
|
-
static inline void
|
16664
|
-
parse_negative_numeric(pm_node_t *node) {
|
16665
|
-
switch (PM_NODE_TYPE(node)) {
|
16666
|
-
case PM_INTEGER_NODE: {
|
16667
|
-
pm_integer_node_t *cast = (pm_integer_node_t *) node;
|
16668
|
-
cast->base.location.start--;
|
16669
|
-
cast->value.negative = true;
|
16670
|
-
break;
|
16671
|
-
}
|
16672
|
-
case PM_FLOAT_NODE: {
|
16673
|
-
pm_float_node_t *cast = (pm_float_node_t *) node;
|
16674
|
-
cast->base.location.start--;
|
16675
|
-
cast->value = -cast->value;
|
16676
|
-
break;
|
16677
|
-
}
|
16678
|
-
case PM_RATIONAL_NODE:
|
16679
|
-
node->location.start--;
|
16680
|
-
parse_negative_numeric(((pm_rational_node_t *) node)->numeric);
|
16681
|
-
break;
|
16682
|
-
case PM_IMAGINARY_NODE:
|
16683
|
-
node->location.start--;
|
16684
|
-
parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
|
16685
|
-
break;
|
16686
|
-
default:
|
16687
|
-
assert(false && "unreachable");
|
16688
|
-
break;
|
16689
|
-
}
|
16690
|
-
}
|
16691
|
-
|
16692
|
-
/**
|
16693
|
-
* Return a string content token at a particular location that is empty.
|
16694
|
-
*/
|
16695
|
-
static pm_token_t
|
16696
|
-
parse_strings_empty_content(const uint8_t *location) {
|
16697
|
-
return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
|
16698
|
-
}
|
16699
|
-
|
16700
|
-
/**
|
16701
|
-
* Parse a set of strings that could be concatenated together.
|
16702
|
-
*/
|
16703
|
-
static inline pm_node_t *
|
16704
|
-
parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
16705
|
-
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
|
16706
|
-
|
16707
|
-
bool concating = false;
|
16708
|
-
bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
|
16709
|
-
|
16710
|
-
while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
16711
|
-
pm_node_t *node = NULL;
|
16712
|
-
|
16713
|
-
// Here we have found a string literal. We'll parse it and add it to
|
16714
|
-
// the list of strings.
|
16715
|
-
const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
16716
|
-
assert(lex_mode->mode == PM_LEX_STRING);
|
16717
|
-
bool lex_interpolation = lex_mode->as.string.interpolation;
|
16718
|
-
|
16719
|
-
pm_token_t opening = parser->current;
|
16720
|
-
parser_lex(parser);
|
16721
|
-
|
16722
|
-
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16723
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16724
|
-
// If we get here, then we have an end immediately after a
|
16725
|
-
// start. In that case we'll create an empty content token and
|
16726
|
-
// return an uninterpolated string.
|
16727
|
-
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16728
|
-
pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
|
16729
|
-
|
16730
|
-
pm_string_shared_init(&string->unescaped, content.start, content.end);
|
16731
|
-
node = (pm_node_t *) string;
|
16732
|
-
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16733
|
-
// If we get here, then we have an end of a label immediately
|
16734
|
-
// after a start. In that case we'll create an empty symbol
|
16735
|
-
// node.
|
16736
|
-
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16737
|
-
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
|
16738
|
-
|
16739
|
-
pm_string_shared_init(&symbol->unescaped, content.start, content.end);
|
16740
|
-
node = (pm_node_t *) symbol;
|
16741
|
-
} else if (!lex_interpolation) {
|
16742
|
-
// If we don't accept interpolation then we expect the string to
|
16743
|
-
// start with a single string content node.
|
16744
|
-
pm_string_t unescaped;
|
16745
|
-
pm_token_t content;
|
16746
|
-
if (match1(parser, PM_TOKEN_EOF)) {
|
16747
|
-
unescaped = PM_STRING_EMPTY;
|
16748
|
-
content = not_provided(parser);
|
16749
|
-
} else {
|
16750
|
-
unescaped = parser->current_string;
|
16751
|
-
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
|
16752
|
-
content = parser->previous;
|
16753
|
-
}
|
16754
|
-
|
16755
|
-
// It is unfortunately possible to have multiple string content
|
16756
|
-
// nodes in a row in the case that there's heredoc content in
|
16757
|
-
// the middle of the string, like this cursed example:
|
16758
|
-
//
|
16759
|
-
// <<-END+'b
|
16760
|
-
// a
|
16761
|
-
// END
|
16762
|
-
// c'+'d'
|
16763
|
-
//
|
16764
|
-
// In that case we need to switch to an interpolated string to
|
16765
|
-
// be able to contain all of the parts.
|
16766
|
-
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16767
|
-
pm_node_list_t parts = { 0 };
|
16768
|
-
|
16769
|
-
pm_token_t delimiters = not_provided(parser);
|
16770
|
-
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
|
16771
|
-
pm_node_list_append(&parts, part);
|
16772
|
-
|
16773
|
-
do {
|
16774
|
-
part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
|
16775
|
-
pm_node_list_append(&parts, part);
|
16776
|
-
parser_lex(parser);
|
16777
|
-
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
16778
|
-
|
16779
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16780
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16781
|
-
|
16782
|
-
pm_node_list_free(&parts);
|
16783
|
-
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16784
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16785
|
-
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16786
|
-
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
16787
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16788
|
-
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
|
16789
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16790
|
-
} else {
|
16791
|
-
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
|
16792
|
-
parser->previous.start = parser->previous.end;
|
16793
|
-
parser->previous.type = PM_TOKEN_MISSING;
|
16794
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16795
|
-
}
|
16796
|
-
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16797
|
-
// In this case we've hit string content so we know the string
|
16798
|
-
// at least has something in it. We'll need to check if the
|
16799
|
-
// following token is the end (in which case we can return a
|
16800
|
-
// plain string) or if it's not then it has interpolation.
|
16801
|
-
pm_token_t content = parser->current;
|
16802
|
-
pm_string_t unescaped = parser->current_string;
|
16803
|
-
parser_lex(parser);
|
16804
|
-
|
16805
|
-
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16806
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16807
|
-
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
16808
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16809
|
-
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16810
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16811
|
-
} else {
|
16812
|
-
// If we get here, then we have interpolation so we'll need
|
16813
|
-
// to create a string or symbol node with interpolation.
|
16814
|
-
pm_node_list_t parts = { 0 };
|
16815
|
-
pm_token_t string_opening = not_provided(parser);
|
16816
|
-
pm_token_t string_closing = not_provided(parser);
|
16817
|
-
|
16818
|
-
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
|
16819
|
-
pm_node_flag_set(part, parse_unescaped_encoding(parser));
|
16820
|
-
pm_node_list_append(&parts, part);
|
16821
|
-
|
16822
|
-
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16823
|
-
if ((part = parse_string_part(parser)) != NULL) {
|
16824
|
-
pm_node_list_append(&parts, part);
|
16825
|
-
}
|
16826
|
-
}
|
16827
|
-
|
16828
|
-
if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16829
|
-
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16830
|
-
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16831
|
-
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16832
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16833
|
-
} else {
|
16834
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16835
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16836
|
-
}
|
16837
|
-
|
16838
|
-
pm_node_list_free(&parts);
|
16839
|
-
}
|
16840
|
-
} else {
|
16841
|
-
// If we get here, then the first part of the string is not plain
|
16842
|
-
// string content, in which case we need to parse the string as an
|
16843
|
-
// interpolated string.
|
16844
|
-
pm_node_list_t parts = { 0 };
|
16845
|
-
pm_node_t *part;
|
16846
|
-
|
16847
|
-
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16848
|
-
if ((part = parse_string_part(parser)) != NULL) {
|
16849
|
-
pm_node_list_append(&parts, part);
|
16850
|
-
}
|
16851
|
-
}
|
16852
|
-
|
16853
|
-
if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16854
|
-
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16855
|
-
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16856
|
-
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16857
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16858
|
-
} else {
|
16859
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16860
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16861
|
-
}
|
16862
|
-
|
16863
|
-
pm_node_list_free(&parts);
|
16864
|
-
}
|
16865
|
-
|
16866
|
-
if (current == NULL) {
|
16867
|
-
// If the node we just parsed is a symbol node, then we can't
|
16868
|
-
// concatenate it with anything else, so we can now return that
|
16869
|
-
// node.
|
16870
|
-
if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
|
16871
|
-
return node;
|
16872
|
-
}
|
16873
|
-
|
16874
|
-
// If we don't already have a node, then it's fine and we can just
|
16875
|
-
// set the result to be the node we just parsed.
|
16876
|
-
current = node;
|
16877
|
-
} else {
|
16878
|
-
// Otherwise we need to check the type of the node we just parsed.
|
16879
|
-
// If it cannot be concatenated with the previous node, then we'll
|
16880
|
-
// need to add a syntax error.
|
16881
|
-
if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
|
16882
|
-
pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
|
16883
|
-
}
|
17173
|
+
node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
|
17174
|
+
}
|
16884
17175
|
|
16885
|
-
|
16886
|
-
|
16887
|
-
|
16888
|
-
|
16889
|
-
|
17176
|
+
xfree(nodes.nodes);
|
17177
|
+
} else if (leading_rest) {
|
17178
|
+
// Otherwise, if we parsed a single splat pattern, then we know we have an
|
17179
|
+
// array pattern, so we can go ahead and create that node.
|
17180
|
+
node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
|
17181
|
+
}
|
16890
17182
|
|
16891
|
-
|
16892
|
-
|
16893
|
-
current = (pm_node_t *) container;
|
16894
|
-
}
|
17183
|
+
return node;
|
17184
|
+
}
|
16895
17185
|
|
16896
|
-
|
17186
|
+
/**
|
17187
|
+
* Incorporate a negative sign into a numeric node by subtracting 1 character
|
17188
|
+
* from its start bounds. If it's a compound node, then we will recursively
|
17189
|
+
* apply this function to its value.
|
17190
|
+
*/
|
17191
|
+
static inline void
|
17192
|
+
parse_negative_numeric(pm_node_t *node) {
|
17193
|
+
switch (PM_NODE_TYPE(node)) {
|
17194
|
+
case PM_INTEGER_NODE: {
|
17195
|
+
pm_integer_node_t *cast = (pm_integer_node_t *) node;
|
17196
|
+
cast->base.location.start--;
|
17197
|
+
cast->value.negative = true;
|
17198
|
+
break;
|
17199
|
+
}
|
17200
|
+
case PM_FLOAT_NODE: {
|
17201
|
+
pm_float_node_t *cast = (pm_float_node_t *) node;
|
17202
|
+
cast->base.location.start--;
|
17203
|
+
cast->value = -cast->value;
|
17204
|
+
break;
|
17205
|
+
}
|
17206
|
+
case PM_RATIONAL_NODE: {
|
17207
|
+
pm_rational_node_t *cast = (pm_rational_node_t *) node;
|
17208
|
+
cast->base.location.start--;
|
17209
|
+
cast->numerator.negative = true;
|
17210
|
+
break;
|
16897
17211
|
}
|
17212
|
+
case PM_IMAGINARY_NODE:
|
17213
|
+
node->location.start--;
|
17214
|
+
parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
|
17215
|
+
break;
|
17216
|
+
default:
|
17217
|
+
assert(false && "unreachable");
|
17218
|
+
break;
|
16898
17219
|
}
|
16899
|
-
|
16900
|
-
return current;
|
16901
17220
|
}
|
16902
17221
|
|
16903
17222
|
/**
|
@@ -16912,6 +17231,11 @@ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
16912
17231
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
|
16913
17232
|
break;
|
16914
17233
|
}
|
17234
|
+
case PM_ERR_HASH_VALUE:
|
17235
|
+
case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
|
17236
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
|
17237
|
+
break;
|
17238
|
+
}
|
16915
17239
|
case PM_ERR_UNARY_RECEIVER: {
|
16916
17240
|
const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
|
16917
17241
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
|
@@ -17090,6 +17414,63 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
|
|
17090
17414
|
}
|
17091
17415
|
}
|
17092
17416
|
|
17417
|
+
/**
|
17418
|
+
* This struct is used to pass information between the regular expression parser
|
17419
|
+
* and the error callback.
|
17420
|
+
*/
|
17421
|
+
typedef struct {
|
17422
|
+
/** The parser that we are parsing the regular expression for. */
|
17423
|
+
pm_parser_t *parser;
|
17424
|
+
|
17425
|
+
/** The start of the regular expression. */
|
17426
|
+
const uint8_t *start;
|
17427
|
+
|
17428
|
+
/** The end of the regular expression. */
|
17429
|
+
const uint8_t *end;
|
17430
|
+
|
17431
|
+
/**
|
17432
|
+
* Whether or not the source of the regular expression is shared. This
|
17433
|
+
* impacts the location of error messages, because if it is shared then we
|
17434
|
+
* can use the location directly and if it is not, then we use the bounds of
|
17435
|
+
* the regular expression itself.
|
17436
|
+
*/
|
17437
|
+
bool shared;
|
17438
|
+
} parse_regular_expression_error_data_t;
|
17439
|
+
|
17440
|
+
/**
|
17441
|
+
* This callback is called when the regular expression parser encounters a
|
17442
|
+
* syntax error.
|
17443
|
+
*/
|
17444
|
+
static void
|
17445
|
+
parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
|
17446
|
+
parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
|
17447
|
+
pm_location_t location;
|
17448
|
+
|
17449
|
+
if (callback_data->shared) {
|
17450
|
+
location = (pm_location_t) { .start = start, .end = end };
|
17451
|
+
} else {
|
17452
|
+
location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
|
17453
|
+
}
|
17454
|
+
|
17455
|
+
PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
|
17456
|
+
}
|
17457
|
+
|
17458
|
+
/**
|
17459
|
+
* Parse the errors for the regular expression and add them to the parser.
|
17460
|
+
*/
|
17461
|
+
static void
|
17462
|
+
parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
|
17463
|
+
const pm_string_t *unescaped = &node->unescaped;
|
17464
|
+
parse_regular_expression_error_data_t error_data = {
|
17465
|
+
.parser = parser,
|
17466
|
+
.start = node->base.location.start,
|
17467
|
+
.end = node->base.location.end,
|
17468
|
+
.shared = unescaped->type == PM_STRING_SHARED
|
17469
|
+
};
|
17470
|
+
|
17471
|
+
pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), NULL, NULL, parse_regular_expression_error, &error_data);
|
17472
|
+
}
|
17473
|
+
|
17093
17474
|
/**
|
17094
17475
|
* Parse an expression that begins with the previous node that we just lexed.
|
17095
17476
|
*/
|
@@ -17110,8 +17491,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17110
17491
|
break;
|
17111
17492
|
}
|
17112
17493
|
|
17113
|
-
|
17114
|
-
|
17494
|
+
// Ensure that we have a comma between elements in the array.
|
17495
|
+
if ((pm_array_node_size(array) != 0) && !accept1(parser, PM_TOKEN_COMMA)) {
|
17496
|
+
const uint8_t *location = parser->previous.end;
|
17497
|
+
PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
|
17498
|
+
|
17499
|
+
parser->previous.start = location;
|
17500
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
17115
17501
|
}
|
17116
17502
|
|
17117
17503
|
// If we have a right bracket immediately following a comma,
|
@@ -17289,7 +17675,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17289
17675
|
|
17290
17676
|
// If we didn't find a terminator and we didn't find a right
|
17291
17677
|
// parenthesis, then this is a syntax error.
|
17292
|
-
if (!terminator_found) {
|
17678
|
+
if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
|
17293
17679
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
17294
17680
|
}
|
17295
17681
|
|
@@ -17318,7 +17704,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17318
17704
|
if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
|
17319
17705
|
} else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
17320
17706
|
break;
|
17321
|
-
} else {
|
17707
|
+
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
17708
|
+
// If we're at the end of the file, then we're going to add
|
17709
|
+
// an error after this for the ) anyway.
|
17322
17710
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
17323
17711
|
}
|
17324
17712
|
}
|
@@ -17537,8 +17925,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17537
17925
|
) {
|
17538
17926
|
pm_arguments_t arguments = { 0 };
|
17539
17927
|
parse_arguments_list(parser, &arguments, true, accepts_command_call);
|
17540
|
-
|
17541
17928
|
pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
|
17929
|
+
|
17930
|
+
if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
|
17931
|
+
// If we're about to convert an 'it' implicit local
|
17932
|
+
// variable read into a method call, we need to remove
|
17933
|
+
// it from the list of implicit local variables.
|
17934
|
+
parse_target_implicit_parameter(parser, node);
|
17935
|
+
} else {
|
17936
|
+
// Otherwise, we're about to convert a regular local
|
17937
|
+
// variable read into a method call, in which case we
|
17938
|
+
// need to indicate that this was not a read for the
|
17939
|
+
// purposes of warnings.
|
17940
|
+
assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
|
17941
|
+
|
17942
|
+
if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
|
17943
|
+
parse_target_implicit_parameter(parser, node);
|
17944
|
+
} else {
|
17945
|
+
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
|
17946
|
+
pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
|
17947
|
+
}
|
17948
|
+
}
|
17949
|
+
|
17542
17950
|
pm_node_destroy(parser, node);
|
17543
17951
|
return (pm_node_t *) fcall;
|
17544
17952
|
}
|
@@ -17546,31 +17954,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17546
17954
|
|
17547
17955
|
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
|
17548
17956
|
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
|
17549
|
-
} else {
|
17550
|
-
// Check if `it` is not going to be assigned.
|
17551
|
-
switch (parser->current.type) {
|
17552
|
-
case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
|
17553
|
-
case PM_TOKEN_AMPERSAND_EQUAL:
|
17554
|
-
case PM_TOKEN_CARET_EQUAL:
|
17555
|
-
case PM_TOKEN_EQUAL:
|
17556
|
-
case PM_TOKEN_GREATER_GREATER_EQUAL:
|
17557
|
-
case PM_TOKEN_LESS_LESS_EQUAL:
|
17558
|
-
case PM_TOKEN_MINUS_EQUAL:
|
17559
|
-
case PM_TOKEN_PARENTHESIS_RIGHT:
|
17560
|
-
case PM_TOKEN_PERCENT_EQUAL:
|
17561
|
-
case PM_TOKEN_PIPE_EQUAL:
|
17562
|
-
case PM_TOKEN_PIPE_PIPE_EQUAL:
|
17563
|
-
case PM_TOKEN_PLUS_EQUAL:
|
17564
|
-
case PM_TOKEN_SLASH_EQUAL:
|
17565
|
-
case PM_TOKEN_STAR_EQUAL:
|
17566
|
-
case PM_TOKEN_STAR_STAR_EQUAL:
|
17567
|
-
break;
|
17568
|
-
default:
|
17569
|
-
// Once we know it's neither a method call nor an
|
17570
|
-
// assignment, we can finally create `it` default
|
17571
|
-
// parameter.
|
17572
|
-
node = pm_node_check_it(parser, node);
|
17573
|
-
}
|
17574
17957
|
}
|
17575
17958
|
|
17576
17959
|
return node;
|
@@ -17831,6 +18214,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17831
18214
|
// as frozen because when clause strings are frozen.
|
17832
18215
|
if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
|
17833
18216
|
pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
|
18217
|
+
} else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
|
18218
|
+
pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
|
17834
18219
|
}
|
17835
18220
|
|
17836
18221
|
pm_when_clause_static_literals_add(parser, &literals, condition);
|
@@ -17887,7 +18272,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17887
18272
|
pm_token_t in_keyword = parser->previous;
|
17888
18273
|
|
17889
18274
|
pm_constant_id_list_t captures = { 0 };
|
17890
|
-
pm_node_t *pattern = parse_pattern(parser, &captures,
|
18275
|
+
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
|
17891
18276
|
|
17892
18277
|
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
|
17893
18278
|
pm_constant_id_list_free(&captures);
|
@@ -17916,7 +18301,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17916
18301
|
then_keyword = not_provided(parser);
|
17917
18302
|
}
|
17918
18303
|
} else {
|
17919
|
-
expect1(parser, PM_TOKEN_KEYWORD_THEN,
|
18304
|
+
expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
|
17920
18305
|
then_keyword = parser->previous;
|
17921
18306
|
}
|
17922
18307
|
|
@@ -18236,7 +18621,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18236
18621
|
|
18237
18622
|
if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
|
18238
18623
|
receiver = parse_variable_call(parser);
|
18239
|
-
receiver = pm_node_check_it(parser, receiver);
|
18240
18624
|
|
18241
18625
|
pm_parser_scope_push(parser, true);
|
18242
18626
|
lex_state_set(parser, PM_LEX_STATE_FNAME);
|
@@ -18370,7 +18754,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18370
18754
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
18371
18755
|
parser->command_start = true;
|
18372
18756
|
|
18373
|
-
|
18757
|
+
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
18758
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
|
18759
|
+
parser->previous.start = parser->previous.end;
|
18760
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
18761
|
+
}
|
18762
|
+
|
18374
18763
|
rparen = parser->previous;
|
18375
18764
|
break;
|
18376
18765
|
}
|
@@ -18568,7 +18957,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18568
18957
|
if (match1(parser, PM_TOKEN_COMMA)) {
|
18569
18958
|
index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
|
18570
18959
|
} else {
|
18571
|
-
index = parse_target(parser, index, false);
|
18960
|
+
index = parse_target(parser, index, false, false);
|
18572
18961
|
}
|
18573
18962
|
|
18574
18963
|
context_pop(parser);
|
@@ -19203,13 +19592,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19203
19592
|
bool ascii_only = parser->current_regular_expression_ascii_only;
|
19204
19593
|
parser_lex(parser);
|
19205
19594
|
|
19206
|
-
// If we hit an end, then we can create a regular expression
|
19207
|
-
// without interpolation, which can be represented more
|
19208
|
-
// more easily compiled.
|
19595
|
+
// If we hit an end, then we can create a regular expression
|
19596
|
+
// node without interpolation, which can be represented more
|
19597
|
+
// succinctly and more easily compiled.
|
19209
19598
|
if (accept1(parser, PM_TOKEN_REGEXP_END)) {
|
19210
|
-
|
19211
|
-
|
19212
|
-
|
19599
|
+
pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
19600
|
+
|
19601
|
+
// If we're not immediately followed by a =~, then we want
|
19602
|
+
// to parse all of the errors at this point. If it is
|
19603
|
+
// followed by a =~, then it will get parsed higher up while
|
19604
|
+
// parsing the named captures as well.
|
19605
|
+
if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
|
19606
|
+
parse_regular_expression_errors(parser, node);
|
19607
|
+
}
|
19608
|
+
|
19609
|
+
pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
|
19610
|
+
return (pm_node_t *) node;
|
19213
19611
|
}
|
19214
19612
|
|
19215
19613
|
// If we get here, then we have interpolation so we'll need to create
|
@@ -19219,6 +19617,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19219
19617
|
pm_token_t opening = not_provided(parser);
|
19220
19618
|
pm_token_t closing = not_provided(parser);
|
19221
19619
|
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
|
19620
|
+
|
19621
|
+
if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
19622
|
+
// This is extremely strange, but the first string part of a
|
19623
|
+
// regular expression will always be tagged as binary if we
|
19624
|
+
// are in a US-ASCII file, no matter its contents.
|
19625
|
+
pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
|
19626
|
+
}
|
19627
|
+
|
19222
19628
|
pm_interpolated_regular_expression_node_append(interpolated, part);
|
19223
19629
|
} else {
|
19224
19630
|
// If the first part of the body of the regular expression is not a
|
@@ -19419,9 +19825,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19419
19825
|
|
19420
19826
|
switch (parser->current.type) {
|
19421
19827
|
case PM_TOKEN_PARENTHESIS_LEFT: {
|
19422
|
-
assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
|
19423
|
-
parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
|
19424
|
-
|
19425
19828
|
pm_token_t opening = parser->current;
|
19426
19829
|
parser_lex(parser);
|
19427
19830
|
|
@@ -19438,9 +19841,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19438
19841
|
break;
|
19439
19842
|
}
|
19440
19843
|
case PM_CASE_PARAMETER: {
|
19441
|
-
assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
|
19442
|
-
parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
|
19443
|
-
|
19444
19844
|
pm_accepts_block_stack_push(parser, false);
|
19445
19845
|
pm_token_t opening = not_provided(parser);
|
19446
19846
|
block_parameters = parse_block_parameters(parser, false, &opening, true);
|
@@ -19693,122 +20093,126 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
|
|
19693
20093
|
}
|
19694
20094
|
|
19695
20095
|
/**
|
19696
|
-
*
|
19697
|
-
*
|
20096
|
+
* This struct is used to pass information between the regular expression parser
|
20097
|
+
* and the named capture callback.
|
19698
20098
|
*/
|
19699
|
-
|
19700
|
-
|
19701
|
-
|
19702
|
-
return false;
|
19703
|
-
}
|
20099
|
+
typedef struct {
|
20100
|
+
/** The parser that is parsing the regular expression. */
|
20101
|
+
pm_parser_t *parser;
|
19704
20102
|
|
19705
|
-
|
19706
|
-
|
19707
|
-
if (!width) {
|
19708
|
-
return false;
|
19709
|
-
}
|
20103
|
+
/** The call node wrapping the regular expression node. */
|
20104
|
+
pm_call_node_t *call;
|
19710
20105
|
|
19711
|
-
|
19712
|
-
|
19713
|
-
if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
|
19714
|
-
} else {
|
19715
|
-
if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
|
19716
|
-
}
|
20106
|
+
/** The match write node that is being created. */
|
20107
|
+
pm_match_write_node_t *match;
|
19717
20108
|
|
19718
|
-
|
19719
|
-
|
19720
|
-
const uint8_t *cursor = source + width;
|
19721
|
-
while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
|
19722
|
-
cursor += width;
|
19723
|
-
}
|
20109
|
+
/** The list of names that have been parsed. */
|
20110
|
+
pm_constant_id_list_t names;
|
19724
20111
|
|
19725
|
-
|
19726
|
-
|
20112
|
+
/**
|
20113
|
+
* Whether the content of the regular expression is shared. This impacts
|
20114
|
+
* whether or not we used owned constants or shared constants in the
|
20115
|
+
* constant pool for the names of the captures.
|
20116
|
+
*/
|
20117
|
+
bool shared;
|
20118
|
+
} parse_regular_expression_named_capture_data_t;
|
19727
20119
|
|
19728
20120
|
/**
|
19729
|
-
*
|
19730
|
-
*
|
20121
|
+
* This callback is called when the regular expression parser encounters a named
|
20122
|
+
* capture group.
|
19731
20123
|
*/
|
19732
|
-
static
|
19733
|
-
|
19734
|
-
|
19735
|
-
pm_node_t *result;
|
20124
|
+
static void
|
20125
|
+
parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
|
20126
|
+
parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data;
|
19736
20127
|
|
19737
|
-
|
19738
|
-
|
19739
|
-
|
19740
|
-
pm_match_write_node_t *match = NULL;
|
19741
|
-
pm_constant_id_list_t names = { 0 };
|
20128
|
+
pm_parser_t *parser = callback_data->parser;
|
20129
|
+
pm_call_node_t *call = callback_data->call;
|
20130
|
+
pm_constant_id_list_t *names = &callback_data->names;
|
19742
20131
|
|
19743
|
-
|
19744
|
-
|
20132
|
+
const uint8_t *source = pm_string_source(capture);
|
20133
|
+
size_t length = pm_string_length(capture);
|
19745
20134
|
|
19746
|
-
|
19747
|
-
|
20135
|
+
pm_location_t location;
|
20136
|
+
pm_constant_id_t name;
|
19748
20137
|
|
19749
|
-
|
19750
|
-
|
20138
|
+
// If the name of the capture group isn't a valid identifier, we do
|
20139
|
+
// not add it to the local table.
|
20140
|
+
if (!pm_slice_is_valid_local(parser, source, source + length)) return;
|
19751
20141
|
|
19752
|
-
|
19753
|
-
|
19754
|
-
|
20142
|
+
if (callback_data->shared) {
|
20143
|
+
// If the unescaped string is a slice of the source, then we can
|
20144
|
+
// copy the names directly. The pointers will line up.
|
20145
|
+
location = (pm_location_t) { .start = source, .end = source + length };
|
20146
|
+
name = pm_parser_constant_id_location(parser, location.start, location.end);
|
20147
|
+
} else {
|
20148
|
+
// Otherwise, the name is a slice of the malloc-ed owned string,
|
20149
|
+
// in which case we need to copy it out into a new string.
|
20150
|
+
location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
|
19755
20151
|
|
19756
|
-
|
19757
|
-
|
19758
|
-
// copy the names directly. The pointers will line up.
|
19759
|
-
location = (pm_location_t) { .start = source, .end = source + length };
|
19760
|
-
name = pm_parser_constant_id_location(parser, location.start, location.end);
|
19761
|
-
} else {
|
19762
|
-
// Otherwise, the name is a slice of the malloc-ed owned string,
|
19763
|
-
// in which case we need to copy it out into a new string.
|
19764
|
-
location = call->receiver->location;
|
20152
|
+
void *memory = xmalloc(length);
|
20153
|
+
if (memory == NULL) abort();
|
19765
20154
|
|
19766
|
-
|
19767
|
-
|
20155
|
+
memcpy(memory, source, length);
|
20156
|
+
name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
|
20157
|
+
}
|
19768
20158
|
|
19769
|
-
|
19770
|
-
|
19771
|
-
|
20159
|
+
// Add this name to the list of constants if it is valid, not duplicated,
|
20160
|
+
// and not a keyword.
|
20161
|
+
if (name != 0 && !pm_constant_id_list_includes(names, name)) {
|
20162
|
+
pm_constant_id_list_append(names, name);
|
19772
20163
|
|
19773
|
-
|
19774
|
-
|
19775
|
-
|
19776
|
-
|
19777
|
-
|
20164
|
+
int depth;
|
20165
|
+
if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
|
20166
|
+
// If the local is not already a local but it is a keyword, then we
|
20167
|
+
// do not want to add a capture for this.
|
20168
|
+
if (pm_local_is_keyword((const char *) source, length)) return;
|
19778
20169
|
|
19779
|
-
|
19780
|
-
|
19781
|
-
|
19782
|
-
|
19783
|
-
if (pm_local_is_keyword((const char *) source, length)) continue;
|
20170
|
+
// If the identifier is not already a local, then we will add it to
|
20171
|
+
// the local table.
|
20172
|
+
pm_parser_local_add(parser, name, location.start, location.end, 0);
|
20173
|
+
}
|
19784
20174
|
|
19785
|
-
|
19786
|
-
|
20175
|
+
// Here we lazily create the MatchWriteNode since we know we're
|
20176
|
+
// about to add a target.
|
20177
|
+
if (callback_data->match == NULL) {
|
20178
|
+
callback_data->match = pm_match_write_node_create(parser, call);
|
20179
|
+
}
|
19787
20180
|
|
19788
|
-
|
19789
|
-
|
19790
|
-
|
20181
|
+
// Next, create the local variable target and add it to the list of
|
20182
|
+
// targets for the match.
|
20183
|
+
pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
|
20184
|
+
pm_node_list_append(&callback_data->match->targets, target);
|
20185
|
+
}
|
20186
|
+
}
|
19791
20187
|
|
19792
|
-
|
19793
|
-
|
19794
|
-
|
19795
|
-
|
19796
|
-
|
19797
|
-
|
20188
|
+
/**
|
20189
|
+
* Potentially change a =~ with a regular expression with named captures into a
|
20190
|
+
* match write node.
|
20191
|
+
*/
|
20192
|
+
static pm_node_t *
|
20193
|
+
parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
|
20194
|
+
parse_regular_expression_named_capture_data_t callback_data = {
|
20195
|
+
.parser = parser,
|
20196
|
+
.call = call,
|
20197
|
+
.names = { 0 },
|
20198
|
+
.shared = content->type == PM_STRING_SHARED
|
20199
|
+
};
|
19798
20200
|
|
19799
|
-
|
19800
|
-
|
19801
|
-
|
19802
|
-
|
19803
|
-
|
20201
|
+
parse_regular_expression_error_data_t error_data = {
|
20202
|
+
.parser = parser,
|
20203
|
+
.start = call->receiver->location.start,
|
20204
|
+
.end = call->receiver->location.end,
|
20205
|
+
.shared = content->type == PM_STRING_SHARED
|
20206
|
+
};
|
20207
|
+
|
20208
|
+
pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
|
20209
|
+
pm_constant_id_list_free(&callback_data.names);
|
19804
20210
|
|
19805
|
-
|
20211
|
+
if (callback_data.match != NULL) {
|
20212
|
+
return (pm_node_t *) callback_data.match;
|
19806
20213
|
} else {
|
19807
|
-
|
20214
|
+
return (pm_node_t *) call;
|
19808
20215
|
}
|
19809
|
-
|
19810
|
-
pm_string_list_free(&named_captures);
|
19811
|
-
return result;
|
19812
20216
|
}
|
19813
20217
|
|
19814
20218
|
static inline pm_node_t *
|
@@ -19925,7 +20329,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
19925
20329
|
return result;
|
19926
20330
|
}
|
19927
20331
|
case PM_CALL_NODE: {
|
19928
|
-
parser_lex(parser);
|
19929
20332
|
pm_call_node_t *cast = (pm_call_node_t *) node;
|
19930
20333
|
|
19931
20334
|
// If we have a vcall (a method with no arguments and no
|
@@ -19936,6 +20339,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
19936
20339
|
pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
|
19937
20340
|
|
19938
20341
|
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
|
20342
|
+
parser_lex(parser);
|
20343
|
+
|
19939
20344
|
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
|
19940
20345
|
pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
|
19941
20346
|
|
@@ -19943,6 +20348,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
19943
20348
|
return result;
|
19944
20349
|
}
|
19945
20350
|
|
20351
|
+
// Move past the token here so that we have already added
|
20352
|
+
// the local variable by this point.
|
20353
|
+
parser_lex(parser);
|
20354
|
+
|
19946
20355
|
// If there is no call operator and the message is "[]" then
|
19947
20356
|
// this is an aref expression, and we can transform it into
|
19948
20357
|
// an aset expression.
|
@@ -20038,7 +20447,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20038
20447
|
return result;
|
20039
20448
|
}
|
20040
20449
|
case PM_CALL_NODE: {
|
20041
|
-
parser_lex(parser);
|
20042
20450
|
pm_call_node_t *cast = (pm_call_node_t *) node;
|
20043
20451
|
|
20044
20452
|
// If we have a vcall (a method with no arguments and no
|
@@ -20049,6 +20457,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20049
20457
|
pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
|
20050
20458
|
|
20051
20459
|
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
|
20460
|
+
parser_lex(parser);
|
20461
|
+
|
20052
20462
|
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
|
20053
20463
|
pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
|
20054
20464
|
|
@@ -20056,6 +20466,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20056
20466
|
return result;
|
20057
20467
|
}
|
20058
20468
|
|
20469
|
+
// Move past the token here so that we have already added
|
20470
|
+
// the local variable by this point.
|
20471
|
+
parser_lex(parser);
|
20472
|
+
|
20059
20473
|
// If there is no call operator and the message is "[]" then
|
20060
20474
|
// this is an aref expression, and we can transform it into
|
20061
20475
|
// an aset expression.
|
@@ -20209,7 +20623,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20209
20623
|
// In this case we have an operator but we don't know what it's for.
|
20210
20624
|
// We need to treat it as an error. For now, we'll mark it as an error
|
20211
20625
|
// and just skip right past it.
|
20212
|
-
|
20626
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
|
20213
20627
|
return node;
|
20214
20628
|
}
|
20215
20629
|
}
|
@@ -20465,7 +20879,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20465
20879
|
|
20466
20880
|
if (
|
20467
20881
|
(parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
|
20468
|
-
(token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))
|
20882
|
+
(accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
|
20469
20883
|
) {
|
20470
20884
|
// If we have a constant immediately following a '::' operator, then
|
20471
20885
|
// this can either be a constant path or a method call, depending on
|
@@ -20591,7 +21005,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20591
21005
|
parser_lex(parser);
|
20592
21006
|
|
20593
21007
|
pm_constant_id_list_t captures = { 0 };
|
20594
|
-
pm_node_t *pattern = parse_pattern(parser, &captures,
|
21008
|
+
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
|
20595
21009
|
|
20596
21010
|
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
|
20597
21011
|
pm_constant_id_list_free(&captures);
|
@@ -20608,7 +21022,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20608
21022
|
parser_lex(parser);
|
20609
21023
|
|
20610
21024
|
pm_constant_id_list_t captures = { 0 };
|
20611
|
-
pm_node_t *pattern = parse_pattern(parser, &captures,
|
21025
|
+
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
|
20612
21026
|
|
20613
21027
|
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
|
20614
21028
|
pm_constant_id_list_free(&captures);
|
@@ -20621,6 +21035,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20621
21035
|
}
|
20622
21036
|
}
|
20623
21037
|
|
21038
|
+
#undef PM_PARSE_PATTERN_SINGLE
|
21039
|
+
#undef PM_PARSE_PATTERN_TOP
|
21040
|
+
#undef PM_PARSE_PATTERN_MULTI
|
21041
|
+
|
20624
21042
|
/**
|
20625
21043
|
* Parse an expression at the given point of the parser using the given binding
|
20626
21044
|
* power to parse subsequent chains. If this function finds a syntax error, it
|
@@ -21004,7 +21422,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
21004
21422
|
|
21005
21423
|
// Scopes given from the outside are not allowed to have numbered
|
21006
21424
|
// parameters.
|
21007
|
-
parser->current_scope->
|
21425
|
+
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
|
21008
21426
|
|
21009
21427
|
for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
|
21010
21428
|
const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
|
@@ -21392,331 +21810,3 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
|
|
21392
21810
|
}
|
21393
21811
|
|
21394
21812
|
#endif
|
21395
|
-
|
21396
|
-
/** An error that is going to be formatted into the output. */
|
21397
|
-
typedef struct {
|
21398
|
-
/** A pointer to the diagnostic that was generated during parsing. */
|
21399
|
-
pm_diagnostic_t *error;
|
21400
|
-
|
21401
|
-
/** The start line of the diagnostic message. */
|
21402
|
-
int32_t line;
|
21403
|
-
|
21404
|
-
/** The column start of the diagnostic message. */
|
21405
|
-
uint32_t column_start;
|
21406
|
-
|
21407
|
-
/** The column end of the diagnostic message. */
|
21408
|
-
uint32_t column_end;
|
21409
|
-
} pm_error_t;
|
21410
|
-
|
21411
|
-
/** The format that will be used to format the errors into the output. */
|
21412
|
-
typedef struct {
|
21413
|
-
/** The prefix that will be used for line numbers. */
|
21414
|
-
const char *number_prefix;
|
21415
|
-
|
21416
|
-
/** The prefix that will be used for blank lines. */
|
21417
|
-
const char *blank_prefix;
|
21418
|
-
|
21419
|
-
/** The divider that will be used between sections of source code. */
|
21420
|
-
const char *divider;
|
21421
|
-
|
21422
|
-
/** The length of the blank prefix. */
|
21423
|
-
size_t blank_prefix_length;
|
21424
|
-
|
21425
|
-
/** The length of the divider. */
|
21426
|
-
size_t divider_length;
|
21427
|
-
} pm_error_format_t;
|
21428
|
-
|
21429
|
-
#define PM_COLOR_GRAY "\033[38;5;102m"
|
21430
|
-
#define PM_COLOR_RED "\033[1;31m"
|
21431
|
-
#define PM_COLOR_RESET "\033[m"
|
21432
|
-
|
21433
|
-
static inline pm_error_t *
|
21434
|
-
pm_parser_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
|
21435
|
-
pm_error_t *errors = xcalloc(error_list->size, sizeof(pm_error_t));
|
21436
|
-
if (errors == NULL) return NULL;
|
21437
|
-
|
21438
|
-
int32_t start_line = parser->start_line;
|
21439
|
-
for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
|
21440
|
-
pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
|
21441
|
-
pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
|
21442
|
-
|
21443
|
-
// We're going to insert this error into the array in sorted order. We
|
21444
|
-
// do this by finding the first error that has a line number greater
|
21445
|
-
// than the current error and then inserting the current error before
|
21446
|
-
// that one.
|
21447
|
-
size_t index = 0;
|
21448
|
-
while (
|
21449
|
-
(index < error_list->size) &&
|
21450
|
-
(errors[index].error != NULL) &&
|
21451
|
-
(
|
21452
|
-
(errors[index].line < start.line) ||
|
21453
|
-
((errors[index].line == start.line) && (errors[index].column_start < start.column))
|
21454
|
-
)
|
21455
|
-
) index++;
|
21456
|
-
|
21457
|
-
// Now we're going to shift all of the errors after this one down one
|
21458
|
-
// index to make room for the new error.
|
21459
|
-
if (index + 1 < error_list->size) {
|
21460
|
-
memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
|
21461
|
-
}
|
21462
|
-
|
21463
|
-
// Finally, we'll insert the error into the array.
|
21464
|
-
uint32_t column_end;
|
21465
|
-
if (start.line == end.line) {
|
21466
|
-
column_end = end.column;
|
21467
|
-
} else {
|
21468
|
-
column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
|
21469
|
-
}
|
21470
|
-
|
21471
|
-
// Ensure we have at least one column of error.
|
21472
|
-
if (start.column == column_end) column_end++;
|
21473
|
-
|
21474
|
-
errors[index] = (pm_error_t) {
|
21475
|
-
.error = error,
|
21476
|
-
.line = start.line,
|
21477
|
-
.column_start = start.column,
|
21478
|
-
.column_end = column_end
|
21479
|
-
};
|
21480
|
-
}
|
21481
|
-
|
21482
|
-
return errors;
|
21483
|
-
}
|
21484
|
-
|
21485
|
-
static inline void
|
21486
|
-
pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, pm_buffer_t *buffer) {
|
21487
|
-
int32_t line_delta = line - parser->start_line;
|
21488
|
-
assert(line_delta >= 0);
|
21489
|
-
|
21490
|
-
size_t index = (size_t) line_delta;
|
21491
|
-
assert(index < newline_list->size);
|
21492
|
-
|
21493
|
-
const uint8_t *start = &parser->start[newline_list->offsets[index]];
|
21494
|
-
const uint8_t *end;
|
21495
|
-
|
21496
|
-
if (index >= newline_list->size - 1) {
|
21497
|
-
end = parser->end;
|
21498
|
-
} else {
|
21499
|
-
end = &parser->start[newline_list->offsets[index + 1]];
|
21500
|
-
}
|
21501
|
-
|
21502
|
-
pm_buffer_append_format(buffer, number_prefix, line);
|
21503
|
-
pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
|
21504
|
-
|
21505
|
-
if (end == parser->end && end[-1] != '\n') {
|
21506
|
-
pm_buffer_append_string(buffer, "\n", 1);
|
21507
|
-
}
|
21508
|
-
}
|
21509
|
-
|
21510
|
-
/**
|
21511
|
-
* Format the errors on the parser into the given buffer.
|
21512
|
-
*/
|
21513
|
-
PRISM_EXPORTED_FUNCTION void
|
21514
|
-
pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, pm_buffer_t *buffer, bool colorize, bool inline_messages) {
|
21515
|
-
assert(error_list->size != 0);
|
21516
|
-
|
21517
|
-
// First, we're going to sort all of the errors by line number using an
|
21518
|
-
// insertion sort into a newly allocated array.
|
21519
|
-
const int32_t start_line = parser->start_line;
|
21520
|
-
const pm_newline_list_t *newline_list = &parser->newline_list;
|
21521
|
-
|
21522
|
-
pm_error_t *errors = pm_parser_errors_format_sort(parser, error_list, newline_list);
|
21523
|
-
if (errors == NULL) return;
|
21524
|
-
|
21525
|
-
// Now we're going to determine how we're going to format line numbers and
|
21526
|
-
// blank lines based on the maximum number of digits in the line numbers
|
21527
|
-
// that are going to be displaid.
|
21528
|
-
pm_error_format_t error_format;
|
21529
|
-
int32_t first_line_number = errors[0].line;
|
21530
|
-
int32_t last_line_number = errors[error_list->size - 1].line;
|
21531
|
-
|
21532
|
-
// If we have a maximum line number that is negative, then we're going to
|
21533
|
-
// use the absolute value for comparison but multiple by 10 to additionally
|
21534
|
-
// have a column for the negative sign.
|
21535
|
-
if (first_line_number < 0) first_line_number = (-first_line_number) * 10;
|
21536
|
-
if (last_line_number < 0) last_line_number = (-last_line_number) * 10;
|
21537
|
-
int32_t max_line_number = first_line_number > last_line_number ? first_line_number : last_line_number;
|
21538
|
-
|
21539
|
-
if (max_line_number < 10) {
|
21540
|
-
if (colorize) {
|
21541
|
-
error_format = (pm_error_format_t) {
|
21542
|
-
.number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
|
21543
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21544
|
-
.divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
|
21545
|
-
};
|
21546
|
-
} else {
|
21547
|
-
error_format = (pm_error_format_t) {
|
21548
|
-
.number_prefix = "%1" PRIi32 " | ",
|
21549
|
-
.blank_prefix = " | ",
|
21550
|
-
.divider = " ~~~~~\n"
|
21551
|
-
};
|
21552
|
-
}
|
21553
|
-
} else if (max_line_number < 100) {
|
21554
|
-
if (colorize) {
|
21555
|
-
error_format = (pm_error_format_t) {
|
21556
|
-
.number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
|
21557
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21558
|
-
.divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
|
21559
|
-
};
|
21560
|
-
} else {
|
21561
|
-
error_format = (pm_error_format_t) {
|
21562
|
-
.number_prefix = "%2" PRIi32 " | ",
|
21563
|
-
.blank_prefix = " | ",
|
21564
|
-
.divider = " ~~~~~~\n"
|
21565
|
-
};
|
21566
|
-
}
|
21567
|
-
} else if (max_line_number < 1000) {
|
21568
|
-
if (colorize) {
|
21569
|
-
error_format = (pm_error_format_t) {
|
21570
|
-
.number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
|
21571
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21572
|
-
.divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
|
21573
|
-
};
|
21574
|
-
} else {
|
21575
|
-
error_format = (pm_error_format_t) {
|
21576
|
-
.number_prefix = "%3" PRIi32 " | ",
|
21577
|
-
.blank_prefix = " | ",
|
21578
|
-
.divider = " ~~~~~~~\n"
|
21579
|
-
};
|
21580
|
-
}
|
21581
|
-
} else if (max_line_number < 10000) {
|
21582
|
-
if (colorize) {
|
21583
|
-
error_format = (pm_error_format_t) {
|
21584
|
-
.number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
|
21585
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21586
|
-
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
21587
|
-
};
|
21588
|
-
} else {
|
21589
|
-
error_format = (pm_error_format_t) {
|
21590
|
-
.number_prefix = "%4" PRIi32 " | ",
|
21591
|
-
.blank_prefix = " | ",
|
21592
|
-
.divider = " ~~~~~~~~\n"
|
21593
|
-
};
|
21594
|
-
}
|
21595
|
-
} else {
|
21596
|
-
if (colorize) {
|
21597
|
-
error_format = (pm_error_format_t) {
|
21598
|
-
.number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
|
21599
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21600
|
-
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
21601
|
-
};
|
21602
|
-
} else {
|
21603
|
-
error_format = (pm_error_format_t) {
|
21604
|
-
.number_prefix = "%5" PRIi32 " | ",
|
21605
|
-
.blank_prefix = " | ",
|
21606
|
-
.divider = " ~~~~~~~~\n"
|
21607
|
-
};
|
21608
|
-
}
|
21609
|
-
}
|
21610
|
-
|
21611
|
-
error_format.blank_prefix_length = strlen(error_format.blank_prefix);
|
21612
|
-
error_format.divider_length = strlen(error_format.divider);
|
21613
|
-
|
21614
|
-
// Now we're going to iterate through every error in our error list and
|
21615
|
-
// display it. While we're iterating, we will display some padding lines of
|
21616
|
-
// the source before the error to give some context. We'll be careful not to
|
21617
|
-
// display the same line twice in case the errors are close enough in the
|
21618
|
-
// source.
|
21619
|
-
int32_t last_line = parser->start_line - 1;
|
21620
|
-
const pm_encoding_t *encoding = parser->encoding;
|
21621
|
-
|
21622
|
-
for (size_t index = 0; index < error_list->size; index++) {
|
21623
|
-
pm_error_t *error = &errors[index];
|
21624
|
-
|
21625
|
-
// Here we determine how many lines of padding of the source to display,
|
21626
|
-
// based on the difference from the last line that was displaid.
|
21627
|
-
if (error->line - last_line > 1) {
|
21628
|
-
if (error->line - last_line > 2) {
|
21629
|
-
if ((index != 0) && (error->line - last_line > 3)) {
|
21630
|
-
pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
|
21631
|
-
}
|
21632
|
-
|
21633
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21634
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
|
21635
|
-
}
|
21636
|
-
|
21637
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21638
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
|
21639
|
-
}
|
21640
|
-
|
21641
|
-
// If this is the first error or we're on a new line, then we'll display
|
21642
|
-
// the line that has the error in it.
|
21643
|
-
if ((index == 0) || (error->line != last_line)) {
|
21644
|
-
if (colorize) {
|
21645
|
-
pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 12);
|
21646
|
-
} else {
|
21647
|
-
pm_buffer_append_string(buffer, "> ", 2);
|
21648
|
-
}
|
21649
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
|
21650
|
-
}
|
21651
|
-
|
21652
|
-
const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
|
21653
|
-
if (start == parser->end) pm_buffer_append_byte(buffer, '\n');
|
21654
|
-
|
21655
|
-
// Now we'll display the actual error message. We'll do this by first
|
21656
|
-
// putting the prefix to the line, then a bunch of blank spaces
|
21657
|
-
// depending on the column, then as many carets as we need to display
|
21658
|
-
// the width of the error, then the error message itself.
|
21659
|
-
//
|
21660
|
-
// Note that this doesn't take into account the width of the actual
|
21661
|
-
// character when displaid in the terminal. For some east-asian
|
21662
|
-
// languages or emoji, this means it can be thrown off pretty badly. We
|
21663
|
-
// will need to solve this eventually.
|
21664
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21665
|
-
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
|
21666
|
-
|
21667
|
-
size_t column = 0;
|
21668
|
-
while (column < error->column_start) {
|
21669
|
-
pm_buffer_append_byte(buffer, ' ');
|
21670
|
-
|
21671
|
-
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21672
|
-
column += (char_width == 0 ? 1 : char_width);
|
21673
|
-
}
|
21674
|
-
|
21675
|
-
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
|
21676
|
-
pm_buffer_append_byte(buffer, '^');
|
21677
|
-
|
21678
|
-
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21679
|
-
column += (char_width == 0 ? 1 : char_width);
|
21680
|
-
|
21681
|
-
while (column < error->column_end) {
|
21682
|
-
pm_buffer_append_byte(buffer, '~');
|
21683
|
-
|
21684
|
-
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21685
|
-
column += (char_width == 0 ? 1 : char_width);
|
21686
|
-
}
|
21687
|
-
|
21688
|
-
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
|
21689
|
-
|
21690
|
-
if (inline_messages) {
|
21691
|
-
pm_buffer_append_byte(buffer, ' ');
|
21692
|
-
assert(error->error != NULL);
|
21693
|
-
|
21694
|
-
const char *message = error->error->message;
|
21695
|
-
pm_buffer_append_string(buffer, message, strlen(message));
|
21696
|
-
}
|
21697
|
-
|
21698
|
-
pm_buffer_append_byte(buffer, '\n');
|
21699
|
-
|
21700
|
-
// Here we determine how many lines of padding to display after the
|
21701
|
-
// error, depending on where the next error is in source.
|
21702
|
-
last_line = error->line;
|
21703
|
-
int32_t next_line = (index == error_list->size - 1) ? (((int32_t) newline_list->size) + parser->start_line) : errors[index + 1].line;
|
21704
|
-
|
21705
|
-
if (next_line - last_line > 1) {
|
21706
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21707
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
21708
|
-
}
|
21709
|
-
|
21710
|
-
if (next_line - last_line > 1) {
|
21711
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21712
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
21713
|
-
}
|
21714
|
-
}
|
21715
|
-
|
21716
|
-
// Finally, we'll free the array of errors that we allocated.
|
21717
|
-
xfree(errors);
|
21718
|
-
}
|
21719
|
-
|
21720
|
-
#undef PM_COLOR_GRAY
|
21721
|
-
#undef PM_COLOR_RED
|
21722
|
-
#undef PM_COLOR_RESET
|