prism 0.29.0 → 0.30.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -1
- data/CONTRIBUTING.md +0 -4
- data/README.md +1 -0
- data/config.yml +66 -9
- data/docs/fuzzing.md +1 -1
- data/docs/ripper_translation.md +22 -0
- data/ext/prism/api_node.c +30 -12
- data/ext/prism/extension.c +107 -372
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +138 -70
- data/include/prism/diagnostic.h +7 -2
- data/include/prism/node.h +0 -21
- data/include/prism/parser.h +23 -25
- data/include/prism/regexp.h +17 -8
- data/include/prism/static_literals.h +3 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +0 -8
- data/include/prism/util/pm_integer.h +16 -9
- data/include/prism/util/pm_string.h +0 -8
- data/include/prism/version.h +2 -2
- data/include/prism.h +0 -11
- data/lib/prism/compiler.rb +3 -0
- data/lib/prism/dispatcher.rb +14 -0
- data/lib/prism/dot_visitor.rb +22 -3
- data/lib/prism/dsl.rb +7 -2
- data/lib/prism/ffi.rb +24 -3
- data/lib/prism/inspect_visitor.rb +10 -8
- data/lib/prism/mutation_compiler.rb +6 -1
- data/lib/prism/node.rb +166 -241
- data/lib/prism/node_ext.rb +21 -5
- data/lib/prism/parse_result/comments.rb +0 -7
- data/lib/prism/parse_result/newlines.rb +101 -11
- data/lib/prism/parse_result.rb +17 -0
- data/lib/prism/reflection.rb +3 -1
- data/lib/prism/serialize.rb +80 -67
- data/lib/prism/translation/parser/compiler.rb +134 -114
- data/lib/prism/translation/parser.rb +6 -1
- data/lib/prism/translation/ripper.rb +8 -6
- data/lib/prism/translation/ruby_parser.rb +23 -5
- data/lib/prism/visitor.rb +3 -0
- data/lib/prism.rb +0 -4
- data/prism.gemspec +1 -4
- data/rbi/prism/node.rbi +63 -6
- data/rbi/prism/visitor.rbi +3 -0
- data/rbi/prism.rbi +6 -0
- data/sig/prism/dsl.rbs +4 -1
- data/sig/prism/mutation_compiler.rbs +1 -0
- data/sig/prism/node.rbs +28 -4
- data/sig/prism/visitor.rbs +1 -0
- data/sig/prism.rbs +21 -0
- data/src/diagnostic.c +27 -17
- data/src/node.c +408 -1666
- data/src/prettyprint.c +49 -6
- data/src/prism.c +958 -991
- data/src/regexp.c +133 -68
- data/src/serialize.c +6 -1
- data/src/static_literals.c +63 -84
- data/src/token_type.c +2 -2
- data/src/util/pm_constant_pool.c +0 -8
- data/src/util/pm_integer.c +39 -11
- data/src/util/pm_string.c +0 -12
- data/src/util/pm_strpbrk.c +32 -6
- metadata +2 -5
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -249
- data/src/util/pm_string_list.c +0 -28
data/src/prism.c
CHANGED
@@ -423,7 +423,7 @@ lex_mode_pop(pm_parser_t *parser) {
|
|
423
423
|
* This is the equivalent of IS_lex_state is CRuby.
|
424
424
|
*/
|
425
425
|
static inline bool
|
426
|
-
lex_state_p(pm_parser_t *parser, pm_lex_state_t state) {
|
426
|
+
lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
|
427
427
|
return parser->lex_state & state;
|
428
428
|
}
|
429
429
|
|
@@ -708,7 +708,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
708
708
|
.previous = parser->current_scope,
|
709
709
|
.locals = { 0 },
|
710
710
|
.parameters = PM_SCOPE_PARAMETERS_NONE,
|
711
|
-
.
|
711
|
+
.implicit_parameters = { 0 },
|
712
712
|
.shareable_constant = (closed || parser->current_scope == NULL) ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
|
713
713
|
.closed = closed
|
714
714
|
};
|
@@ -1183,6 +1183,31 @@ pm_check_value_expression(pm_node_t *node) {
|
|
1183
1183
|
return NULL;
|
1184
1184
|
case PM_BEGIN_NODE: {
|
1185
1185
|
pm_begin_node_t *cast = (pm_begin_node_t *) node;
|
1186
|
+
|
1187
|
+
if (cast->statements == NULL && cast->ensure_clause != NULL) {
|
1188
|
+
node = (pm_node_t *) cast->ensure_clause;
|
1189
|
+
}
|
1190
|
+
else {
|
1191
|
+
if (cast->rescue_clause != NULL) {
|
1192
|
+
if (cast->rescue_clause->statements == NULL) {
|
1193
|
+
return NULL;
|
1194
|
+
}
|
1195
|
+
else if (cast->else_clause != NULL) {
|
1196
|
+
node = (pm_node_t *) cast->else_clause;
|
1197
|
+
}
|
1198
|
+
else {
|
1199
|
+
node = (pm_node_t *) cast->statements;
|
1200
|
+
}
|
1201
|
+
}
|
1202
|
+
else {
|
1203
|
+
node = (pm_node_t *) cast->statements;
|
1204
|
+
}
|
1205
|
+
}
|
1206
|
+
|
1207
|
+
break;
|
1208
|
+
}
|
1209
|
+
case PM_ENSURE_NODE: {
|
1210
|
+
pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
|
1186
1211
|
node = (pm_node_t *) cast->statements;
|
1187
1212
|
break;
|
1188
1213
|
}
|
@@ -1630,7 +1655,7 @@ not_provided(pm_parser_t *parser) {
|
|
1630
1655
|
return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
|
1631
1656
|
}
|
1632
1657
|
|
1633
|
-
#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = parser->start, .end = parser->start })
|
1658
|
+
#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
|
1634
1659
|
#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
|
1635
1660
|
#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
|
1636
1661
|
#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
|
@@ -2827,8 +2852,7 @@ static pm_call_node_t *
|
|
2827
2852
|
pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
|
2828
2853
|
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
|
2829
2854
|
|
2830
|
-
node->base.location
|
2831
|
-
node->base.location.end = parser->start;
|
2855
|
+
node->base.location = PM_LOCATION_NULL_VALUE(parser);
|
2832
2856
|
node->arguments = arguments;
|
2833
2857
|
|
2834
2858
|
node->name = name;
|
@@ -4291,7 +4315,7 @@ pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
4291
4315
|
}
|
4292
4316
|
|
4293
4317
|
/**
|
4294
|
-
* Allocate and initialize a new
|
4318
|
+
* Allocate and initialize a new RationalNode node from a FLOAT_RATIONAL token.
|
4295
4319
|
*/
|
4296
4320
|
static pm_rational_node_t *
|
4297
4321
|
pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
|
@@ -4301,16 +4325,44 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
4301
4325
|
*node = (pm_rational_node_t) {
|
4302
4326
|
{
|
4303
4327
|
.type = PM_RATIONAL_NODE,
|
4304
|
-
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
4328
|
+
.flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
|
4305
4329
|
.location = PM_LOCATION_TOKEN_VALUE(token)
|
4306
4330
|
},
|
4307
|
-
.
|
4308
|
-
|
4309
|
-
.start = token->start,
|
4310
|
-
.end = token->end - 1
|
4311
|
-
}))
|
4331
|
+
.numerator = { 0 },
|
4332
|
+
.denominator = { 0 }
|
4312
4333
|
};
|
4313
4334
|
|
4335
|
+
const uint8_t *start = token->start;
|
4336
|
+
const uint8_t *end = token->end - 1; // r
|
4337
|
+
|
4338
|
+
while (start < end && *start == '0') start++; // 0.1 -> .1
|
4339
|
+
while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
|
4340
|
+
|
4341
|
+
size_t length = (size_t) (end - start);
|
4342
|
+
if (length == 1) {
|
4343
|
+
node->denominator.value = 1;
|
4344
|
+
return node;
|
4345
|
+
}
|
4346
|
+
|
4347
|
+
const uint8_t *point = memchr(start, '.', length);
|
4348
|
+
assert(point && "should have a decimal point");
|
4349
|
+
|
4350
|
+
uint8_t *digits = malloc(length);
|
4351
|
+
if (digits == NULL) {
|
4352
|
+
fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
|
4353
|
+
abort();
|
4354
|
+
}
|
4355
|
+
|
4356
|
+
memcpy(digits, start, (unsigned long) (point - start));
|
4357
|
+
memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
|
4358
|
+
pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
|
4359
|
+
|
4360
|
+
digits[0] = '1';
|
4361
|
+
if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
|
4362
|
+
pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
|
4363
|
+
free(digits);
|
4364
|
+
|
4365
|
+
pm_integers_reduce(&node->numerator, &node->denominator);
|
4314
4366
|
return node;
|
4315
4367
|
}
|
4316
4368
|
|
@@ -4621,7 +4673,7 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant
|
|
4621
4673
|
*node = (pm_global_variable_read_node_t) {
|
4622
4674
|
{
|
4623
4675
|
.type = PM_GLOBAL_VARIABLE_READ_NODE,
|
4624
|
-
.location =
|
4676
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
4625
4677
|
},
|
4626
4678
|
.name = name
|
4627
4679
|
};
|
@@ -4663,11 +4715,11 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan
|
|
4663
4715
|
*node = (pm_global_variable_write_node_t) {
|
4664
4716
|
{
|
4665
4717
|
.type = PM_GLOBAL_VARIABLE_WRITE_NODE,
|
4666
|
-
.location =
|
4718
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
4667
4719
|
},
|
4668
4720
|
.name = name,
|
4669
|
-
.name_loc =
|
4670
|
-
.operator_loc =
|
4721
|
+
.name_loc = PM_LOCATION_NULL_VALUE(parser),
|
4722
|
+
.operator_loc = PM_LOCATION_NULL_VALUE(parser),
|
4671
4723
|
.value = value
|
4672
4724
|
};
|
4673
4725
|
|
@@ -4944,7 +4996,7 @@ pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, cons
|
|
4944
4996
|
}
|
4945
4997
|
|
4946
4998
|
/**
|
4947
|
-
* Allocate and initialize a new
|
4999
|
+
* Allocate and initialize a new RationalNode node from an INTEGER_RATIONAL
|
4948
5000
|
* token.
|
4949
5001
|
*/
|
4950
5002
|
static pm_rational_node_t *
|
@@ -4955,16 +5007,24 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const
|
|
4955
5007
|
*node = (pm_rational_node_t) {
|
4956
5008
|
{
|
4957
5009
|
.type = PM_RATIONAL_NODE,
|
4958
|
-
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
5010
|
+
.flags = base | PM_NODE_FLAG_STATIC_LITERAL,
|
4959
5011
|
.location = PM_LOCATION_TOKEN_VALUE(token)
|
4960
5012
|
},
|
4961
|
-
.
|
4962
|
-
|
4963
|
-
.start = token->start,
|
4964
|
-
.end = token->end - 1
|
4965
|
-
}))
|
5013
|
+
.numerator = { 0 },
|
5014
|
+
.denominator = { .value = 1, 0 }
|
4966
5015
|
};
|
4967
5016
|
|
5017
|
+
pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
|
5018
|
+
switch (base) {
|
5019
|
+
case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
|
5020
|
+
case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
|
5021
|
+
case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
|
5022
|
+
case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
|
5023
|
+
default: assert(false && "unreachable"); break;
|
5024
|
+
}
|
5025
|
+
|
5026
|
+
pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
|
5027
|
+
|
4968
5028
|
return node;
|
4969
5029
|
}
|
4970
5030
|
|
@@ -5462,6 +5522,23 @@ pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node,
|
|
5462
5522
|
node->base.location.end = closing->end;
|
5463
5523
|
}
|
5464
5524
|
|
5525
|
+
/**
|
5526
|
+
* Create a local variable read that is reading the implicit 'it' variable.
|
5527
|
+
*/
|
5528
|
+
static pm_it_local_variable_read_node_t *
|
5529
|
+
pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
|
5530
|
+
pm_it_local_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_it_local_variable_read_node_t);
|
5531
|
+
|
5532
|
+
*node = (pm_it_local_variable_read_node_t) {
|
5533
|
+
{
|
5534
|
+
.type = PM_IT_LOCAL_VARIABLE_READ_NODE,
|
5535
|
+
.location = PM_LOCATION_TOKEN_VALUE(name)
|
5536
|
+
}
|
5537
|
+
};
|
5538
|
+
|
5539
|
+
return node;
|
5540
|
+
}
|
5541
|
+
|
5465
5542
|
/**
|
5466
5543
|
* Allocate and initialize a new ItParametersNode node.
|
5467
5544
|
*/
|
@@ -5774,28 +5851,6 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
|
|
5774
5851
|
return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
|
5775
5852
|
}
|
5776
5853
|
|
5777
|
-
/**
|
5778
|
-
* Returns true if the given node is `it` default parameter.
|
5779
|
-
*/
|
5780
|
-
static inline bool
|
5781
|
-
pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
|
5782
|
-
// Check if it's a local variable reference
|
5783
|
-
if (node->type != PM_CALL_NODE) {
|
5784
|
-
return false;
|
5785
|
-
}
|
5786
|
-
|
5787
|
-
// Check if it's a variable call
|
5788
|
-
pm_call_node_t *call_node = (pm_call_node_t *) node;
|
5789
|
-
if (!PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
|
5790
|
-
return false;
|
5791
|
-
}
|
5792
|
-
|
5793
|
-
// Check if it's called `it`
|
5794
|
-
pm_constant_id_t id = ((pm_call_node_t *)node)->name;
|
5795
|
-
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
|
5796
|
-
return pm_token_is_it(constant->start, constant->start + constant->length);
|
5797
|
-
}
|
5798
|
-
|
5799
5854
|
/**
|
5800
5855
|
* Returns true if the given bounds comprise a numbered parameter (i.e., they
|
5801
5856
|
* are of the form /^_\d$/).
|
@@ -7355,9 +7410,9 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
|
|
7355
7410
|
{
|
7356
7411
|
.type = PM_SYMBOL_NODE,
|
7357
7412
|
.flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
|
7358
|
-
.location =
|
7413
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
7359
7414
|
},
|
7360
|
-
.value_loc =
|
7415
|
+
.value_loc = PM_LOCATION_NULL_VALUE(parser),
|
7361
7416
|
.unescaped = { 0 }
|
7362
7417
|
};
|
7363
7418
|
|
@@ -7758,10 +7813,10 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
|
|
7758
7813
|
*node = (pm_while_node_t) {
|
7759
7814
|
{
|
7760
7815
|
.type = PM_WHILE_NODE,
|
7761
|
-
.location =
|
7816
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
7762
7817
|
},
|
7763
|
-
.keyword_loc =
|
7764
|
-
.closing_loc =
|
7818
|
+
.keyword_loc = PM_LOCATION_NULL_VALUE(parser),
|
7819
|
+
.closing_loc = PM_LOCATION_NULL_VALUE(parser),
|
7765
7820
|
.predicate = predicate,
|
7766
7821
|
.statements = statements
|
7767
7822
|
};
|
@@ -7916,51 +7971,6 @@ pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t leng
|
|
7916
7971
|
return constant_id;
|
7917
7972
|
}
|
7918
7973
|
|
7919
|
-
/**
|
7920
|
-
* Create a local variable read that is reading the implicit 'it' variable.
|
7921
|
-
*/
|
7922
|
-
static pm_local_variable_read_node_t *
|
7923
|
-
pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *name) {
|
7924
|
-
if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_ORDINARY) {
|
7925
|
-
pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
|
7926
|
-
return NULL;
|
7927
|
-
}
|
7928
|
-
|
7929
|
-
if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED) {
|
7930
|
-
pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
|
7931
|
-
return NULL;
|
7932
|
-
}
|
7933
|
-
|
7934
|
-
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IT;
|
7935
|
-
|
7936
|
-
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
7937
|
-
pm_parser_local_add(parser, name_id, name->start, name->end, 0);
|
7938
|
-
|
7939
|
-
return pm_local_variable_read_node_create_constant_id(parser, name, name_id, 0, false);
|
7940
|
-
}
|
7941
|
-
|
7942
|
-
/**
|
7943
|
-
* Convert a `it` variable call node to a node for `it` default parameter.
|
7944
|
-
*/
|
7945
|
-
static pm_node_t *
|
7946
|
-
pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
|
7947
|
-
if (
|
7948
|
-
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
|
7949
|
-
!parser->current_scope->closed &&
|
7950
|
-
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
7951
|
-
pm_node_is_it(parser, node)
|
7952
|
-
) {
|
7953
|
-
pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
|
7954
|
-
|
7955
|
-
if (read != NULL) {
|
7956
|
-
pm_node_destroy(parser, node);
|
7957
|
-
node = (pm_node_t *) read;
|
7958
|
-
}
|
7959
|
-
}
|
7960
|
-
|
7961
|
-
return node;
|
7962
|
-
}
|
7963
|
-
|
7964
7974
|
/**
|
7965
7975
|
* Add a parameter name to the current scope and check whether the name of the
|
7966
7976
|
* parameter is unique or not.
|
@@ -7996,6 +8006,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
|
|
7996
8006
|
pm_scope_t *scope = parser->current_scope;
|
7997
8007
|
parser->current_scope = scope->previous;
|
7998
8008
|
pm_locals_free(&scope->locals);
|
8009
|
+
pm_node_list_free(&scope->implicit_parameters);
|
7999
8010
|
xfree(scope);
|
8000
8011
|
}
|
8001
8012
|
|
@@ -8067,7 +8078,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) {
|
|
8067
8078
|
* is beyond the end of the source then return '\0'.
|
8068
8079
|
*/
|
8069
8080
|
static inline uint8_t
|
8070
|
-
peek_at(pm_parser_t *parser, const uint8_t *cursor) {
|
8081
|
+
peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
|
8071
8082
|
if (cursor < parser->end) {
|
8072
8083
|
return *cursor;
|
8073
8084
|
} else {
|
@@ -8090,7 +8101,7 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
|
|
8090
8101
|
* that position is beyond the end of the source then return '\0'.
|
8091
8102
|
*/
|
8092
8103
|
static inline uint8_t
|
8093
|
-
peek(pm_parser_t *parser) {
|
8104
|
+
peek(const pm_parser_t *parser) {
|
8094
8105
|
return peek_at(parser, parser->current.end);
|
8095
8106
|
}
|
8096
8107
|
|
@@ -8155,6 +8166,14 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
|
|
8155
8166
|
return memchr(cursor, '\n', (size_t) length);
|
8156
8167
|
}
|
8157
8168
|
|
8169
|
+
/**
|
8170
|
+
* This is equivalent to the predicate of warn_balanced in CRuby.
|
8171
|
+
*/
|
8172
|
+
static inline bool
|
8173
|
+
ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
|
8174
|
+
return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
|
8175
|
+
}
|
8176
|
+
|
8158
8177
|
/**
|
8159
8178
|
* Here we're going to check if this is a "magic" comment, and perform whatever
|
8160
8179
|
* actions are necessary for it here.
|
@@ -8995,8 +9014,8 @@ lex_global_variable(pm_parser_t *parser) {
|
|
8995
9014
|
// If we get here, then we have a $ followed by something that
|
8996
9015
|
// isn't recognized as a global variable.
|
8997
9016
|
pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
8998
|
-
|
8999
|
-
|
9017
|
+
const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9018
|
+
PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
|
9000
9019
|
}
|
9001
9020
|
|
9002
9021
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -9389,7 +9408,7 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
|
|
9389
9408
|
*/
|
9390
9409
|
static inline uint8_t
|
9391
9410
|
escape_byte(uint8_t value, const uint8_t flags) {
|
9392
|
-
if (flags & PM_ESCAPE_FLAG_CONTROL) value &=
|
9411
|
+
if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
|
9393
9412
|
if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
|
9394
9413
|
return value;
|
9395
9414
|
}
|
@@ -9489,22 +9508,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
|
|
9489
9508
|
static inline void
|
9490
9509
|
escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
|
9491
9510
|
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9492
|
-
|
9493
|
-
|
9494
|
-
uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
|
9495
|
-
uint8_t byte2 = (uint8_t) (byte & 0xF);
|
9496
|
-
|
9497
|
-
if (byte1 >= 0xA) {
|
9498
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
|
9499
|
-
} else {
|
9500
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte1 + '0'));
|
9501
|
-
}
|
9502
|
-
|
9503
|
-
if (byte2 >= 0xA) {
|
9504
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 - 0xA + 'A'));
|
9505
|
-
} else {
|
9506
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 + '0'));
|
9507
|
-
}
|
9511
|
+
pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
|
9508
9512
|
}
|
9509
9513
|
|
9510
9514
|
escape_write_byte_encoded(parser, buffer, byte);
|
@@ -9539,57 +9543,57 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9539
9543
|
switch (peek(parser)) {
|
9540
9544
|
case '\\': {
|
9541
9545
|
parser->current.end++;
|
9542
|
-
|
9546
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
|
9543
9547
|
return;
|
9544
9548
|
}
|
9545
9549
|
case '\'': {
|
9546
9550
|
parser->current.end++;
|
9547
|
-
|
9551
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
|
9548
9552
|
return;
|
9549
9553
|
}
|
9550
9554
|
case 'a': {
|
9551
9555
|
parser->current.end++;
|
9552
|
-
|
9556
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
|
9553
9557
|
return;
|
9554
9558
|
}
|
9555
9559
|
case 'b': {
|
9556
9560
|
parser->current.end++;
|
9557
|
-
|
9561
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
|
9558
9562
|
return;
|
9559
9563
|
}
|
9560
9564
|
case 'e': {
|
9561
9565
|
parser->current.end++;
|
9562
|
-
|
9566
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
|
9563
9567
|
return;
|
9564
9568
|
}
|
9565
9569
|
case 'f': {
|
9566
9570
|
parser->current.end++;
|
9567
|
-
|
9571
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
|
9568
9572
|
return;
|
9569
9573
|
}
|
9570
9574
|
case 'n': {
|
9571
9575
|
parser->current.end++;
|
9572
|
-
|
9576
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
|
9573
9577
|
return;
|
9574
9578
|
}
|
9575
9579
|
case 'r': {
|
9576
9580
|
parser->current.end++;
|
9577
|
-
|
9581
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
|
9578
9582
|
return;
|
9579
9583
|
}
|
9580
9584
|
case 's': {
|
9581
9585
|
parser->current.end++;
|
9582
|
-
|
9586
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
|
9583
9587
|
return;
|
9584
9588
|
}
|
9585
9589
|
case 't': {
|
9586
9590
|
parser->current.end++;
|
9587
|
-
|
9591
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
|
9588
9592
|
return;
|
9589
9593
|
}
|
9590
9594
|
case 'v': {
|
9591
9595
|
parser->current.end++;
|
9592
|
-
|
9596
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
|
9593
9597
|
return;
|
9594
9598
|
}
|
9595
9599
|
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
|
@@ -9606,7 +9610,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9606
9610
|
}
|
9607
9611
|
}
|
9608
9612
|
|
9609
|
-
|
9613
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
|
9610
9614
|
return;
|
9611
9615
|
}
|
9612
9616
|
case 'x': {
|
@@ -9625,11 +9629,16 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9625
9629
|
parser->current.end++;
|
9626
9630
|
}
|
9627
9631
|
|
9632
|
+
value = escape_byte(value, flags);
|
9628
9633
|
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9629
|
-
|
9634
|
+
if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
|
9635
|
+
pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
|
9636
|
+
} else {
|
9637
|
+
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
|
9638
|
+
}
|
9630
9639
|
}
|
9631
9640
|
|
9632
|
-
escape_write_byte_encoded(parser, buffer,
|
9641
|
+
escape_write_byte_encoded(parser, buffer, value);
|
9633
9642
|
} else {
|
9634
9643
|
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
|
9635
9644
|
}
|
@@ -9658,7 +9667,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9658
9667
|
pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
|
9659
9668
|
} else if (hexadecimal_length == 0) {
|
9660
9669
|
// there are not hexadecimal characters
|
9661
|
-
pm_parser_err(parser,
|
9670
|
+
pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
|
9671
|
+
pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
|
9662
9672
|
return;
|
9663
9673
|
}
|
9664
9674
|
|
@@ -9707,10 +9717,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9707
9717
|
}
|
9708
9718
|
}
|
9709
9719
|
|
9710
|
-
if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
|
9711
|
-
pm_parser_err(parser, start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9712
|
-
}
|
9713
|
-
|
9714
9720
|
return;
|
9715
9721
|
}
|
9716
9722
|
case 'c': {
|
@@ -9733,6 +9739,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9733
9739
|
return;
|
9734
9740
|
}
|
9735
9741
|
parser->current.end++;
|
9742
|
+
|
9743
|
+
if (match(parser, 'u') || match(parser, 'U')) {
|
9744
|
+
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9745
|
+
return;
|
9746
|
+
}
|
9747
|
+
|
9736
9748
|
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
|
9737
9749
|
return;
|
9738
9750
|
case ' ':
|
@@ -9760,7 +9772,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9760
9772
|
case 'C': {
|
9761
9773
|
parser->current.end++;
|
9762
9774
|
if (peek(parser) != '-') {
|
9763
|
-
|
9775
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9776
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
|
9764
9777
|
return;
|
9765
9778
|
}
|
9766
9779
|
|
@@ -9783,6 +9796,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9783
9796
|
return;
|
9784
9797
|
}
|
9785
9798
|
parser->current.end++;
|
9799
|
+
|
9800
|
+
if (match(parser, 'u') || match(parser, 'U')) {
|
9801
|
+
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9802
|
+
return;
|
9803
|
+
}
|
9804
|
+
|
9786
9805
|
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
|
9787
9806
|
return;
|
9788
9807
|
case ' ':
|
@@ -9797,7 +9816,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9797
9816
|
return;
|
9798
9817
|
default: {
|
9799
9818
|
if (!char_is_ascii_printable(peeked)) {
|
9800
|
-
|
9819
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9820
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
|
9801
9821
|
return;
|
9802
9822
|
}
|
9803
9823
|
|
@@ -9810,7 +9830,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9810
9830
|
case 'M': {
|
9811
9831
|
parser->current.end++;
|
9812
9832
|
if (peek(parser) != '-') {
|
9813
|
-
|
9833
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9834
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
|
9814
9835
|
return;
|
9815
9836
|
}
|
9816
9837
|
|
@@ -9828,6 +9849,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9828
9849
|
return;
|
9829
9850
|
}
|
9830
9851
|
parser->current.end++;
|
9852
|
+
|
9853
|
+
if (match(parser, 'u') || match(parser, 'U')) {
|
9854
|
+
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9855
|
+
return;
|
9856
|
+
}
|
9857
|
+
|
9831
9858
|
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
|
9832
9859
|
return;
|
9833
9860
|
case ' ':
|
@@ -9842,7 +9869,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9842
9869
|
return;
|
9843
9870
|
default:
|
9844
9871
|
if (!char_is_ascii_printable(peeked)) {
|
9845
|
-
|
9872
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9873
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
|
9846
9874
|
return;
|
9847
9875
|
}
|
9848
9876
|
|
@@ -10803,6 +10831,8 @@ parser_lex(pm_parser_t *parser) {
|
|
10803
10831
|
type = PM_TOKEN_USTAR_STAR;
|
10804
10832
|
} else if (lex_state_beg_p(parser)) {
|
10805
10833
|
type = PM_TOKEN_USTAR_STAR;
|
10834
|
+
} else if (ambiguous_operator_p(parser, space_seen)) {
|
10835
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
|
10806
10836
|
}
|
10807
10837
|
|
10808
10838
|
if (lex_state_operator_p(parser)) {
|
@@ -10826,6 +10856,8 @@ parser_lex(pm_parser_t *parser) {
|
|
10826
10856
|
type = PM_TOKEN_USTAR;
|
10827
10857
|
} else if (lex_state_beg_p(parser)) {
|
10828
10858
|
type = PM_TOKEN_USTAR;
|
10859
|
+
} else if (ambiguous_operator_p(parser, space_seen)) {
|
10860
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
|
10829
10861
|
}
|
10830
10862
|
|
10831
10863
|
if (lex_state_operator_p(parser)) {
|
@@ -10942,6 +10974,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10942
10974
|
// If we have quotes, then we're going to go until we find the
|
10943
10975
|
// end quote.
|
10944
10976
|
while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
|
10977
|
+
if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
|
10945
10978
|
parser->current.end++;
|
10946
10979
|
}
|
10947
10980
|
}
|
@@ -10999,6 +11032,10 @@ parser_lex(pm_parser_t *parser) {
|
|
10999
11032
|
LEX(PM_TOKEN_LESS_LESS_EQUAL);
|
11000
11033
|
}
|
11001
11034
|
|
11035
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11036
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
|
11037
|
+
}
|
11038
|
+
|
11002
11039
|
if (lex_state_operator_p(parser)) {
|
11003
11040
|
lex_state_set(parser, PM_LEX_STATE_ARG);
|
11004
11041
|
} else {
|
@@ -11112,6 +11149,8 @@ parser_lex(pm_parser_t *parser) {
|
|
11112
11149
|
type = PM_TOKEN_UAMPERSAND;
|
11113
11150
|
} else if (lex_state_beg_p(parser)) {
|
11114
11151
|
type = PM_TOKEN_UAMPERSAND;
|
11152
|
+
} else if (ambiguous_operator_p(parser, space_seen)) {
|
11153
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
|
11115
11154
|
}
|
11116
11155
|
|
11117
11156
|
if (lex_state_operator_p(parser)) {
|
@@ -11186,6 +11225,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11186
11225
|
LEX(PM_TOKEN_UPLUS);
|
11187
11226
|
}
|
11188
11227
|
|
11228
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11229
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
|
11230
|
+
}
|
11231
|
+
|
11189
11232
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
11190
11233
|
LEX(PM_TOKEN_PLUS);
|
11191
11234
|
}
|
@@ -11223,6 +11266,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11223
11266
|
LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
|
11224
11267
|
}
|
11225
11268
|
|
11269
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11270
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
|
11271
|
+
}
|
11272
|
+
|
11226
11273
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
11227
11274
|
LEX(PM_TOKEN_MINUS);
|
11228
11275
|
}
|
@@ -11321,6 +11368,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11321
11368
|
LEX(PM_TOKEN_REGEXP_BEGIN);
|
11322
11369
|
}
|
11323
11370
|
|
11371
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11372
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
|
11373
|
+
}
|
11374
|
+
|
11324
11375
|
if (lex_state_operator_p(parser)) {
|
11325
11376
|
lex_state_set(parser, PM_LEX_STATE_ARG);
|
11326
11377
|
} else {
|
@@ -11356,7 +11407,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11356
11407
|
// operator because we don't want to move into the string
|
11357
11408
|
// lex mode unnecessarily.
|
11358
11409
|
if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
|
11359
|
-
pm_parser_err_current(parser,
|
11410
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
|
11360
11411
|
LEX(PM_TOKEN_PERCENT);
|
11361
11412
|
}
|
11362
11413
|
|
@@ -11375,10 +11426,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11375
11426
|
|
11376
11427
|
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
11377
11428
|
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
11378
|
-
|
11379
|
-
if (parser->current.end < parser->end) {
|
11380
|
-
LEX(PM_TOKEN_STRING_BEGIN);
|
11381
|
-
}
|
11429
|
+
LEX(PM_TOKEN_STRING_BEGIN);
|
11382
11430
|
}
|
11383
11431
|
|
11384
11432
|
// Delimiters for %-literals cannot be alphanumeric. We
|
@@ -11505,6 +11553,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11505
11553
|
}
|
11506
11554
|
}
|
11507
11555
|
|
11556
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11557
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
|
11558
|
+
}
|
11559
|
+
|
11508
11560
|
lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
|
11509
11561
|
LEX(PM_TOKEN_PERCENT);
|
11510
11562
|
}
|
@@ -12315,9 +12367,10 @@ parser_lex(pm_parser_t *parser) {
|
|
12315
12367
|
|
12316
12368
|
// If we are immediately following a newline and we have hit the
|
12317
12369
|
// terminator, then we need to return the ending of the heredoc.
|
12318
|
-
if (
|
12370
|
+
if (current_token_starts_line(parser)) {
|
12319
12371
|
const uint8_t *start = parser->current.start;
|
12320
|
-
|
12372
|
+
|
12373
|
+
if (!line_continuation && (start + ident_length <= parser->end)) {
|
12321
12374
|
const uint8_t *newline = next_newline(start, parser->end - start);
|
12322
12375
|
const uint8_t *ident_end = newline;
|
12323
12376
|
const uint8_t *terminator_end = newline;
|
@@ -12473,11 +12526,8 @@ parser_lex(pm_parser_t *parser) {
|
|
12473
12526
|
}
|
12474
12527
|
|
12475
12528
|
parser->current.end = breakpoint + 1;
|
12476
|
-
|
12477
|
-
|
12478
|
-
pm_token_buffer_flush(parser, &token_buffer);
|
12479
|
-
LEX(PM_TOKEN_STRING_CONTENT);
|
12480
|
-
}
|
12529
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
12530
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
12481
12531
|
}
|
12482
12532
|
|
12483
12533
|
// Otherwise we hit a newline and it wasn't followed by
|
@@ -13112,11 +13162,40 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
|
|
13112
13162
|
return (pm_node_t *) result;
|
13113
13163
|
}
|
13114
13164
|
|
13165
|
+
/**
|
13166
|
+
* When an implicit local variable is written to or targeted, it becomes a
|
13167
|
+
* regular, named local variable. This function removes it from the list of
|
13168
|
+
* implicit parameters when that happens.
|
13169
|
+
*/
|
13170
|
+
static void
|
13171
|
+
parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
|
13172
|
+
pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
|
13173
|
+
|
13174
|
+
for (size_t index = 0; index < implicit_parameters->size; index++) {
|
13175
|
+
if (implicit_parameters->nodes[index] == node) {
|
13176
|
+
// If the node is not the last one in the list, we need to shift the
|
13177
|
+
// remaining nodes down to fill the gap. This is extremely unlikely
|
13178
|
+
// to happen.
|
13179
|
+
if (index != implicit_parameters->size - 1) {
|
13180
|
+
memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
|
13181
|
+
}
|
13182
|
+
|
13183
|
+
implicit_parameters->size--;
|
13184
|
+
break;
|
13185
|
+
}
|
13186
|
+
}
|
13187
|
+
}
|
13188
|
+
|
13115
13189
|
/**
|
13116
13190
|
* Convert the given node into a valid target node.
|
13191
|
+
*
|
13192
|
+
* @param multiple Whether or not this target is part of a larger set of
|
13193
|
+
* targets. If it is, then the &. operator is not allowed.
|
13194
|
+
* @param splat Whether or not this target is a child of a splat target. If it
|
13195
|
+
* is, then fewer patterns are allowed.
|
13117
13196
|
*/
|
13118
13197
|
static pm_node_t *
|
13119
|
-
parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
13198
|
+
parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
|
13120
13199
|
switch (PM_NODE_TYPE(target)) {
|
13121
13200
|
case PM_MISSING_NODE:
|
13122
13201
|
return target;
|
@@ -13162,7 +13241,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
|
13162
13241
|
target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
|
13163
13242
|
return target;
|
13164
13243
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
13165
|
-
|
13244
|
+
if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
|
13245
|
+
PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
|
13246
|
+
parse_target_implicit_parameter(parser, target);
|
13247
|
+
}
|
13166
13248
|
|
13167
13249
|
const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
|
13168
13250
|
uint32_t name = cast->name;
|
@@ -13174,17 +13256,32 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
|
13174
13256
|
|
13175
13257
|
return target;
|
13176
13258
|
}
|
13259
|
+
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
|
13260
|
+
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
|
13261
|
+
pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
|
13262
|
+
|
13263
|
+
parse_target_implicit_parameter(parser, target);
|
13264
|
+
pm_node_destroy(parser, target);
|
13265
|
+
|
13266
|
+
return node;
|
13267
|
+
}
|
13177
13268
|
case PM_INSTANCE_VARIABLE_READ_NODE:
|
13178
13269
|
assert(sizeof(pm_instance_variable_target_node_t) == sizeof(pm_instance_variable_read_node_t));
|
13179
13270
|
target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
|
13180
13271
|
return target;
|
13181
13272
|
case PM_MULTI_TARGET_NODE:
|
13273
|
+
if (splat_parent) {
|
13274
|
+
// Multi target is not accepted in all positions. If this is one
|
13275
|
+
// of them, then we need to add an error.
|
13276
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
13277
|
+
}
|
13278
|
+
|
13182
13279
|
return target;
|
13183
13280
|
case PM_SPLAT_NODE: {
|
13184
13281
|
pm_splat_node_t *splat = (pm_splat_node_t *) target;
|
13185
13282
|
|
13186
13283
|
if (splat->expression != NULL) {
|
13187
|
-
splat->expression = parse_target(parser, splat->expression, multiple);
|
13284
|
+
splat->expression = parse_target(parser, splat->expression, multiple, true);
|
13188
13285
|
}
|
13189
13286
|
|
13190
13287
|
return (pm_node_t *) splat;
|
@@ -13254,9 +13351,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
|
13254
13351
|
*/
|
13255
13352
|
static pm_node_t *
|
13256
13353
|
parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
13257
|
-
pm_node_t *result = parse_target(parser, target, multiple);
|
13354
|
+
pm_node_t *result = parse_target(parser, target, multiple, false);
|
13258
13355
|
|
13259
|
-
// Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
|
13356
|
+
// Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
|
13357
|
+
// parens after the targets.
|
13260
13358
|
if (
|
13261
13359
|
!match1(parser, PM_TOKEN_EQUAL) &&
|
13262
13360
|
!(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
|
@@ -13326,18 +13424,34 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13326
13424
|
return (pm_node_t *) node;
|
13327
13425
|
}
|
13328
13426
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
13329
|
-
pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
|
13330
13427
|
pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
|
13331
13428
|
|
13332
13429
|
pm_constant_id_t name = local_read->name;
|
13430
|
+
pm_location_t name_loc = target->location;
|
13431
|
+
|
13333
13432
|
uint32_t depth = local_read->depth;
|
13334
|
-
|
13433
|
+
pm_scope_t *scope = pm_parser_scope_find(parser, depth);
|
13335
13434
|
|
13336
|
-
|
13435
|
+
if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
|
13436
|
+
pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
|
13437
|
+
PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
|
13438
|
+
parse_target_implicit_parameter(parser, target);
|
13439
|
+
}
|
13440
|
+
|
13441
|
+
pm_locals_unread(&scope->locals, name);
|
13337
13442
|
pm_node_destroy(parser, target);
|
13338
13443
|
|
13339
13444
|
return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
|
13340
13445
|
}
|
13446
|
+
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
|
13447
|
+
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
|
13448
|
+
pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
|
13449
|
+
|
13450
|
+
parse_target_implicit_parameter(parser, target);
|
13451
|
+
pm_node_destroy(parser, target);
|
13452
|
+
|
13453
|
+
return node;
|
13454
|
+
}
|
13341
13455
|
case PM_INSTANCE_VARIABLE_READ_NODE: {
|
13342
13456
|
pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
|
13343
13457
|
pm_node_destroy(parser, target);
|
@@ -13491,7 +13605,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13491
13605
|
bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
|
13492
13606
|
|
13493
13607
|
pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
|
13494
|
-
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
|
13608
|
+
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
|
13495
13609
|
|
13496
13610
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
13497
13611
|
if (accept1(parser, PM_TOKEN_USTAR)) {
|
@@ -13507,7 +13621,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13507
13621
|
|
13508
13622
|
if (token_begins_expression_p(parser->current.type)) {
|
13509
13623
|
name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
|
13510
|
-
name = parse_target(parser, name, true);
|
13624
|
+
name = parse_target(parser, name, true, true);
|
13511
13625
|
}
|
13512
13626
|
|
13513
13627
|
pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
|
@@ -13515,7 +13629,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13515
13629
|
has_rest = true;
|
13516
13630
|
} else if (token_begins_expression_p(parser->current.type)) {
|
13517
13631
|
pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
|
13518
|
-
target = parse_target(parser, target, true);
|
13632
|
+
target = parse_target(parser, target, true, false);
|
13519
13633
|
|
13520
13634
|
pm_multi_target_node_targets_append(parser, result, target);
|
13521
13635
|
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
@@ -13552,8 +13666,8 @@ parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_
|
|
13552
13666
|
*/
|
13553
13667
|
static pm_statements_node_t *
|
13554
13668
|
parse_statements(pm_parser_t *parser, pm_context_t context) {
|
13555
|
-
// First, skip past any optional terminators that might be at the beginning
|
13556
|
-
// the statements.
|
13669
|
+
// First, skip past any optional terminators that might be at the beginning
|
13670
|
+
// of the statements.
|
13557
13671
|
while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
|
13558
13672
|
|
13559
13673
|
// If we have a terminator, then we can just return NULL.
|
@@ -13569,20 +13683,20 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13569
13683
|
pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
|
13570
13684
|
pm_statements_node_body_append(parser, statements, node);
|
13571
13685
|
|
13572
|
-
// If we're recovering from a syntax error, then we need to stop parsing
|
13573
|
-
// statements now.
|
13686
|
+
// If we're recovering from a syntax error, then we need to stop parsing
|
13687
|
+
// the statements now.
|
13574
13688
|
if (parser->recovering) {
|
13575
|
-
// If this is the level of context where the recovery has happened,
|
13576
|
-
// we can mark the parser as done recovering.
|
13689
|
+
// If this is the level of context where the recovery has happened,
|
13690
|
+
// then we can mark the parser as done recovering.
|
13577
13691
|
if (context_terminator(context, &parser->current)) parser->recovering = false;
|
13578
13692
|
break;
|
13579
13693
|
}
|
13580
13694
|
|
13581
|
-
// If we have a terminator, then we will parse all consecutive
|
13582
|
-
// and then continue parsing the statements list.
|
13695
|
+
// If we have a terminator, then we will parse all consecutive
|
13696
|
+
// terminators and then continue parsing the statements list.
|
13583
13697
|
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
13584
|
-
// If we have a terminator, then we will continue parsing the
|
13585
|
-
// list.
|
13698
|
+
// If we have a terminator, then we will continue parsing the
|
13699
|
+
// statements list.
|
13586
13700
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
13587
13701
|
if (context_terminator(context, &parser->current)) break;
|
13588
13702
|
|
@@ -13590,27 +13704,28 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13590
13704
|
continue;
|
13591
13705
|
}
|
13592
13706
|
|
13593
|
-
// At this point we have a list of statements that are not terminated by
|
13594
|
-
// newline or semicolon. At this point we need to check if we're at
|
13595
|
-
// of the statements list. If we are, then we should break out
|
13707
|
+
// At this point we have a list of statements that are not terminated by
|
13708
|
+
// a newline or semicolon. At this point we need to check if we're at
|
13709
|
+
// the end of the statements list. If we are, then we should break out
|
13710
|
+
// of the loop.
|
13596
13711
|
if (context_terminator(context, &parser->current)) break;
|
13597
13712
|
|
13598
13713
|
// At this point, we have a syntax error, because the statement was not
|
13599
13714
|
// terminated by a newline or semicolon, and we're not at the end of the
|
13600
|
-
// statements list. Ideally we should scan forward to determine if we
|
13601
|
-
// insert a missing terminator or break out of parsing the
|
13602
|
-
// at this point.
|
13715
|
+
// statements list. Ideally we should scan forward to determine if we
|
13716
|
+
// should insert a missing terminator or break out of parsing the
|
13717
|
+
// statements list at this point.
|
13603
13718
|
//
|
13604
|
-
// We don't have that yet, so instead we'll do a more naive approach. If
|
13605
|
-
// were unable to parse an expression, then we will skip past this
|
13606
|
-
// continue parsing the statements list. Otherwise we'll add
|
13607
|
-
// continue parsing the statements list.
|
13719
|
+
// We don't have that yet, so instead we'll do a more naive approach. If
|
13720
|
+
// we were unable to parse an expression, then we will skip past this
|
13721
|
+
// token and continue parsing the statements list. Otherwise we'll add
|
13722
|
+
// an error and continue parsing the statements list.
|
13608
13723
|
if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
|
13609
13724
|
parser_lex(parser);
|
13610
13725
|
|
13611
13726
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
13612
13727
|
if (context_terminator(context, &parser->current)) break;
|
13613
|
-
} else if (!
|
13728
|
+
} else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
|
13614
13729
|
// This is an inlined version of accept1 because the error that we
|
13615
13730
|
// want to add has varargs. If this happens again, we should
|
13616
13731
|
// probably extract a helper function.
|
@@ -13632,7 +13747,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13632
13747
|
*/
|
13633
13748
|
static void
|
13634
13749
|
pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13635
|
-
const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
|
13750
|
+
const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
|
13636
13751
|
|
13637
13752
|
if (duplicated != NULL) {
|
13638
13753
|
pm_buffer_t buffer = { 0 };
|
@@ -13658,13 +13773,16 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
|
|
13658
13773
|
*/
|
13659
13774
|
static void
|
13660
13775
|
pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13661
|
-
|
13776
|
+
pm_node_t *previous;
|
13777
|
+
|
13778
|
+
if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
|
13662
13779
|
pm_diagnostic_list_append_format(
|
13663
13780
|
&parser->warning_list,
|
13664
13781
|
node->location.start,
|
13665
13782
|
node->location.end,
|
13666
13783
|
PM_WARN_DUPLICATED_WHEN_CLAUSE,
|
13667
|
-
pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
|
13784
|
+
pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
|
13785
|
+
pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
|
13668
13786
|
);
|
13669
13787
|
}
|
13670
13788
|
}
|
@@ -14276,7 +14394,7 @@ parse_parameters(
|
|
14276
14394
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
14277
14395
|
|
14278
14396
|
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
|
14279
|
-
uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
|
14397
|
+
uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
|
14280
14398
|
|
14281
14399
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
|
14282
14400
|
pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
|
@@ -14289,7 +14407,7 @@ parse_parameters(
|
|
14289
14407
|
// If the value of the parameter increased the number of
|
14290
14408
|
// reads of that parameter, then we need to warn that we
|
14291
14409
|
// have a circular definition.
|
14292
|
-
if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
|
14410
|
+
if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
|
14293
14411
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
|
14294
14412
|
}
|
14295
14413
|
|
@@ -14368,10 +14486,10 @@ parse_parameters(
|
|
14368
14486
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
14369
14487
|
|
14370
14488
|
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
|
14371
|
-
uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
|
14489
|
+
uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
|
14372
14490
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
|
14373
14491
|
|
14374
|
-
if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
|
14492
|
+
if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
|
14375
14493
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
|
14376
14494
|
}
|
14377
14495
|
|
@@ -14543,7 +14661,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14543
14661
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14544
14662
|
|
14545
14663
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14546
|
-
reference = parse_target(parser, reference, false);
|
14664
|
+
reference = parse_target(parser, reference, false, false);
|
14547
14665
|
|
14548
14666
|
pm_rescue_node_reference_set(rescue, reference);
|
14549
14667
|
break;
|
@@ -14573,7 +14691,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14573
14691
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14574
14692
|
|
14575
14693
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14576
|
-
reference = parse_target(parser, reference, false);
|
14694
|
+
reference = parse_target(parser, reference, false, false);
|
14577
14695
|
|
14578
14696
|
pm_rescue_node_reference_set(rescue, reference);
|
14579
14697
|
break;
|
@@ -14778,6 +14896,28 @@ parse_block_parameters(
|
|
14778
14896
|
return block_parameters;
|
14779
14897
|
}
|
14780
14898
|
|
14899
|
+
/**
|
14900
|
+
* Return true if any of the visible scopes to the current context are using
|
14901
|
+
* numbered parameters.
|
14902
|
+
*/
|
14903
|
+
static bool
|
14904
|
+
outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
|
14905
|
+
for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
|
14906
|
+
if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
|
14907
|
+
}
|
14908
|
+
|
14909
|
+
return false;
|
14910
|
+
}
|
14911
|
+
|
14912
|
+
/**
|
14913
|
+
* These are the names of the various numbered parameters. We have them here so
|
14914
|
+
* that when we insert them into the constant pool we can use a constant string
|
14915
|
+
* and not have to allocate.
|
14916
|
+
*/
|
14917
|
+
static const char * const pm_numbered_parameter_names[] = {
|
14918
|
+
"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
|
14919
|
+
};
|
14920
|
+
|
14781
14921
|
/**
|
14782
14922
|
* Return the node that should be used in the parameters field of a block-like
|
14783
14923
|
* (block or lambda) node, depending on the kind of parameters that were
|
@@ -14785,31 +14925,79 @@ parse_block_parameters(
|
|
14785
14925
|
*/
|
14786
14926
|
static pm_node_t *
|
14787
14927
|
parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
|
14788
|
-
|
14928
|
+
pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
|
14929
|
+
|
14930
|
+
// If we have ordinary parameters, then we will return them as the set of
|
14931
|
+
// parameters.
|
14932
|
+
if (parameters != NULL) {
|
14933
|
+
// If we also have implicit parameters, then this is an error.
|
14934
|
+
if (implicit_parameters->size > 0) {
|
14935
|
+
pm_node_t *node = implicit_parameters->nodes[0];
|
14936
|
+
|
14937
|
+
if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
|
14938
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
|
14939
|
+
} else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
|
14940
|
+
pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
|
14941
|
+
} else {
|
14942
|
+
assert(false && "unreachable");
|
14943
|
+
}
|
14944
|
+
}
|
14789
14945
|
|
14790
|
-
if (masked == PM_SCOPE_PARAMETERS_NONE) {
|
14791
|
-
assert(parameters == NULL);
|
14792
|
-
return NULL;
|
14793
|
-
} else if (masked == PM_SCOPE_PARAMETERS_ORDINARY) {
|
14794
|
-
assert(parameters != NULL);
|
14795
14946
|
return parameters;
|
14796
|
-
}
|
14797
|
-
|
14947
|
+
}
|
14948
|
+
|
14949
|
+
// If we don't have any implicit parameters, then the set of parameters is
|
14950
|
+
// NULL.
|
14951
|
+
if (implicit_parameters->size == 0) {
|
14952
|
+
return NULL;
|
14953
|
+
}
|
14954
|
+
|
14955
|
+
// If we don't have ordinary parameters, then we now must validate our set
|
14956
|
+
// of implicit parameters. We can only have numbered parameters or it, but
|
14957
|
+
// they cannot be mixed.
|
14958
|
+
uint8_t numbered_parameter = 0;
|
14959
|
+
bool it_parameter = false;
|
14960
|
+
|
14961
|
+
for (size_t index = 0; index < implicit_parameters->size; index++) {
|
14962
|
+
pm_node_t *node = implicit_parameters->nodes[index];
|
14963
|
+
|
14964
|
+
if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
|
14965
|
+
if (it_parameter) {
|
14966
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
|
14967
|
+
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
14968
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
|
14969
|
+
} else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
|
14970
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
|
14971
|
+
} else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
|
14972
|
+
numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
|
14973
|
+
} else {
|
14974
|
+
assert(false && "unreachable");
|
14975
|
+
}
|
14976
|
+
} else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
|
14977
|
+
if (numbered_parameter > 0) {
|
14978
|
+
pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
|
14979
|
+
} else {
|
14980
|
+
it_parameter = true;
|
14981
|
+
}
|
14982
|
+
}
|
14983
|
+
}
|
14798
14984
|
|
14799
|
-
|
14800
|
-
|
14801
|
-
|
14802
|
-
|
14985
|
+
if (numbered_parameter > 0) {
|
14986
|
+
// Go through the parent scopes and mark them as being disallowed from
|
14987
|
+
// using numbered parameters because this inner scope is using them.
|
14988
|
+
for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
|
14989
|
+
scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
|
14803
14990
|
}
|
14804
14991
|
|
14805
|
-
|
14806
|
-
|
14807
|
-
|
14992
|
+
const pm_location_t location = { .start = opening->start, .end = closing->end };
|
14993
|
+
return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
|
14994
|
+
}
|
14995
|
+
|
14996
|
+
if (it_parameter) {
|
14808
14997
|
return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
|
14809
|
-
} else {
|
14810
|
-
assert(false && "unreachable");
|
14811
|
-
return NULL;
|
14812
14998
|
}
|
14999
|
+
|
15000
|
+
return NULL;
|
14813
15001
|
}
|
14814
15002
|
|
14815
15003
|
/**
|
@@ -14826,9 +15014,6 @@ parse_block(pm_parser_t *parser) {
|
|
14826
15014
|
pm_block_parameters_node_t *block_parameters = NULL;
|
14827
15015
|
|
14828
15016
|
if (accept1(parser, PM_TOKEN_PIPE)) {
|
14829
|
-
assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
|
14830
|
-
parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
|
14831
|
-
|
14832
15017
|
pm_token_t block_parameters_opening = parser->previous;
|
14833
15018
|
if (match1(parser, PM_TOKEN_PIPE)) {
|
14834
15019
|
block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
|
@@ -15326,7 +15511,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
|
|
15326
15511
|
#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
|
15327
15512
|
case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
|
15328
15513
|
case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
|
15329
|
-
case PM_NUMBERED_REFERENCE_READ_NODE
|
15514
|
+
case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
|
15330
15515
|
|
15331
15516
|
// Assert here that the flags are the same so that we can safely switch the type
|
15332
15517
|
// of the node without having to move the flags.
|
@@ -15384,6 +15569,10 @@ parse_string_part(pm_parser_t *parser) {
|
|
15384
15569
|
// "aaa #{bbb} #@ccc ddd"
|
15385
15570
|
// ^^^^^^
|
15386
15571
|
case PM_TOKEN_EMBEXPR_BEGIN: {
|
15572
|
+
// Ruby disallows seeing encoding around interpolation in strings,
|
15573
|
+
// even though it is known at parse time.
|
15574
|
+
parser->explicit_encoding = NULL;
|
15575
|
+
|
15387
15576
|
pm_lex_state_t state = parser->lex_state;
|
15388
15577
|
int brace_nesting = parser->brace_nesting;
|
15389
15578
|
|
@@ -15406,6 +15595,13 @@ parse_string_part(pm_parser_t *parser) {
|
|
15406
15595
|
expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
|
15407
15596
|
pm_token_t closing = parser->previous;
|
15408
15597
|
|
15598
|
+
// If this set of embedded statements only contains a single
|
15599
|
+
// statement, then Ruby does not consider it as a possible statement
|
15600
|
+
// that could emit a line event.
|
15601
|
+
if (statements != NULL && statements->body.size == 1) {
|
15602
|
+
pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
|
15603
|
+
}
|
15604
|
+
|
15409
15605
|
return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
|
15410
15606
|
}
|
15411
15607
|
|
@@ -15416,6 +15612,10 @@ parse_string_part(pm_parser_t *parser) {
|
|
15416
15612
|
// "aaa #{bbb} #@ccc ddd"
|
15417
15613
|
// ^^^^^
|
15418
15614
|
case PM_TOKEN_EMBVAR: {
|
15615
|
+
// Ruby disallows seeing encoding around interpolation in strings,
|
15616
|
+
// even though it is known at parse time.
|
15617
|
+
parser->explicit_encoding = NULL;
|
15618
|
+
|
15419
15619
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
15420
15620
|
parser_lex(parser);
|
15421
15621
|
|
@@ -15731,74 +15931,43 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
|
|
15731
15931
|
}
|
15732
15932
|
|
15733
15933
|
/**
|
15734
|
-
*
|
15735
|
-
*
|
15934
|
+
* Parse an identifier into either a local variable read. If the local variable
|
15935
|
+
* is not found, it returns NULL instead.
|
15736
15936
|
*/
|
15737
|
-
static
|
15738
|
-
|
15739
|
-
|
15740
|
-
|
15741
|
-
}
|
15937
|
+
static pm_node_t *
|
15938
|
+
parse_variable(pm_parser_t *parser) {
|
15939
|
+
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
|
15940
|
+
int depth;
|
15742
15941
|
|
15743
|
-
|
15744
|
-
|
15745
|
-
|
15746
|
-
/**
|
15747
|
-
* These are the names of the various numbered parameters. We have them here so
|
15748
|
-
* that when we insert them into the constant pool we can use a constant string
|
15749
|
-
* and not have to allocate.
|
15750
|
-
*/
|
15751
|
-
static const char * const pm_numbered_parameter_names[] = {
|
15752
|
-
"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
|
15753
|
-
};
|
15754
|
-
|
15755
|
-
/**
|
15756
|
-
* Parse an identifier into either a local variable read. If the local variable
|
15757
|
-
* is not found, it returns NULL instead.
|
15758
|
-
*/
|
15759
|
-
static pm_local_variable_read_node_t *
|
15760
|
-
parse_variable(pm_parser_t *parser) {
|
15761
|
-
int depth;
|
15762
|
-
if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
|
15763
|
-
return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
|
15764
|
-
}
|
15942
|
+
if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
|
15943
|
+
return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
|
15944
|
+
}
|
15765
15945
|
|
15766
15946
|
pm_scope_t *current_scope = parser->current_scope;
|
15767
|
-
if (!current_scope->closed && current_scope->
|
15768
|
-
|
15769
|
-
|
15770
|
-
|
15771
|
-
|
15772
|
-
|
15773
|
-
|
15774
|
-
|
15775
|
-
|
15776
|
-
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
15777
|
-
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
15778
|
-
} else {
|
15779
|
-
// Indicate that this scope is using numbered params so that child
|
15780
|
-
// scopes cannot. We subtract the value for the character '0' to get
|
15781
|
-
// the actual integer value of the number (only _1 through _9 are
|
15782
|
-
// valid).
|
15783
|
-
int8_t numbered_parameters = (int8_t) (parser->previous.start[1] - '0');
|
15784
|
-
current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED;
|
15785
|
-
|
15786
|
-
if (numbered_parameters > current_scope->numbered_parameters) {
|
15787
|
-
current_scope->numbered_parameters = numbered_parameters;
|
15947
|
+
if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
|
15948
|
+
if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
|
15949
|
+
// When you use a numbered parameter, it implies the existence of
|
15950
|
+
// all of the locals that exist before it. For example, referencing
|
15951
|
+
// _2 means that _1 must exist. Therefore here we loop through all
|
15952
|
+
// of the possibilities and add them into the constant pool.
|
15953
|
+
uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
|
15954
|
+
for (uint8_t number = 1; number <= maximum; number++) {
|
15955
|
+
pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
|
15788
15956
|
}
|
15789
15957
|
|
15790
|
-
|
15791
|
-
|
15792
|
-
// referencing _2 means that _1 must exist. Therefore here we
|
15793
|
-
// loop through all of the possibilities and add them into the
|
15794
|
-
// constant pool.
|
15795
|
-
for (int8_t numbered_param = 1; numbered_param <= numbered_parameters - 1; numbered_param++) {
|
15796
|
-
pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_param - 1], 2);
|
15958
|
+
if (!match1(parser, PM_TOKEN_EQUAL)) {
|
15959
|
+
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
|
15797
15960
|
}
|
15798
15961
|
|
15799
|
-
|
15800
|
-
|
15801
|
-
|
15962
|
+
pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
|
15963
|
+
pm_node_list_append(¤t_scope->implicit_parameters, node);
|
15964
|
+
|
15965
|
+
return node;
|
15966
|
+
} else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
|
15967
|
+
pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
|
15968
|
+
pm_node_list_append(¤t_scope->implicit_parameters, node);
|
15969
|
+
|
15970
|
+
return node;
|
15802
15971
|
}
|
15803
15972
|
}
|
15804
15973
|
|
@@ -15813,8 +15982,8 @@ parse_variable_call(pm_parser_t *parser) {
|
|
15813
15982
|
pm_node_flags_t flags = 0;
|
15814
15983
|
|
15815
15984
|
if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
|
15816
|
-
|
15817
|
-
if (node != NULL) return
|
15985
|
+
pm_node_t *node = parse_variable(parser);
|
15986
|
+
if (node != NULL) return node;
|
15818
15987
|
flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
|
15819
15988
|
}
|
15820
15989
|
|
@@ -15932,6 +16101,230 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
|
|
15932
16101
|
nodes->size = write_index;
|
15933
16102
|
}
|
15934
16103
|
|
16104
|
+
/**
|
16105
|
+
* Return a string content token at a particular location that is empty.
|
16106
|
+
*/
|
16107
|
+
static pm_token_t
|
16108
|
+
parse_strings_empty_content(const uint8_t *location) {
|
16109
|
+
return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
|
16110
|
+
}
|
16111
|
+
|
16112
|
+
/**
|
16113
|
+
* Parse a set of strings that could be concatenated together.
|
16114
|
+
*/
|
16115
|
+
static inline pm_node_t *
|
16116
|
+
parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
16117
|
+
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
|
16118
|
+
|
16119
|
+
bool concating = false;
|
16120
|
+
bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
|
16121
|
+
|
16122
|
+
while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
16123
|
+
pm_node_t *node = NULL;
|
16124
|
+
|
16125
|
+
// Here we have found a string literal. We'll parse it and add it to
|
16126
|
+
// the list of strings.
|
16127
|
+
const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
16128
|
+
assert(lex_mode->mode == PM_LEX_STRING);
|
16129
|
+
bool lex_interpolation = lex_mode->as.string.interpolation;
|
16130
|
+
|
16131
|
+
pm_token_t opening = parser->current;
|
16132
|
+
parser_lex(parser);
|
16133
|
+
|
16134
|
+
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16135
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16136
|
+
// If we get here, then we have an end immediately after a
|
16137
|
+
// start. In that case we'll create an empty content token and
|
16138
|
+
// return an uninterpolated string.
|
16139
|
+
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16140
|
+
pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
|
16141
|
+
|
16142
|
+
pm_string_shared_init(&string->unescaped, content.start, content.end);
|
16143
|
+
node = (pm_node_t *) string;
|
16144
|
+
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16145
|
+
// If we get here, then we have an end of a label immediately
|
16146
|
+
// after a start. In that case we'll create an empty symbol
|
16147
|
+
// node.
|
16148
|
+
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16149
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
|
16150
|
+
|
16151
|
+
pm_string_shared_init(&symbol->unescaped, content.start, content.end);
|
16152
|
+
node = (pm_node_t *) symbol;
|
16153
|
+
} else if (!lex_interpolation) {
|
16154
|
+
// If we don't accept interpolation then we expect the string to
|
16155
|
+
// start with a single string content node.
|
16156
|
+
pm_string_t unescaped;
|
16157
|
+
pm_token_t content;
|
16158
|
+
|
16159
|
+
if (match1(parser, PM_TOKEN_EOF)) {
|
16160
|
+
unescaped = PM_STRING_EMPTY;
|
16161
|
+
content = not_provided(parser);
|
16162
|
+
} else {
|
16163
|
+
unescaped = parser->current_string;
|
16164
|
+
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
|
16165
|
+
content = parser->previous;
|
16166
|
+
}
|
16167
|
+
|
16168
|
+
// It is unfortunately possible to have multiple string content
|
16169
|
+
// nodes in a row in the case that there's heredoc content in
|
16170
|
+
// the middle of the string, like this cursed example:
|
16171
|
+
//
|
16172
|
+
// <<-END+'b
|
16173
|
+
// a
|
16174
|
+
// END
|
16175
|
+
// c'+'d'
|
16176
|
+
//
|
16177
|
+
// In that case we need to switch to an interpolated string to
|
16178
|
+
// be able to contain all of the parts.
|
16179
|
+
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16180
|
+
pm_node_list_t parts = { 0 };
|
16181
|
+
|
16182
|
+
pm_token_t delimiters = not_provided(parser);
|
16183
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
|
16184
|
+
pm_node_list_append(&parts, part);
|
16185
|
+
|
16186
|
+
do {
|
16187
|
+
part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
|
16188
|
+
pm_node_list_append(&parts, part);
|
16189
|
+
parser_lex(parser);
|
16190
|
+
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
16191
|
+
|
16192
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16193
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16194
|
+
|
16195
|
+
pm_node_list_free(&parts);
|
16196
|
+
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16197
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16198
|
+
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16199
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
16200
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16201
|
+
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
|
16202
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16203
|
+
} else {
|
16204
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
|
16205
|
+
parser->previous.start = parser->previous.end;
|
16206
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
16207
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16208
|
+
}
|
16209
|
+
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16210
|
+
// In this case we've hit string content so we know the string
|
16211
|
+
// at least has something in it. We'll need to check if the
|
16212
|
+
// following token is the end (in which case we can return a
|
16213
|
+
// plain string) or if it's not then it has interpolation.
|
16214
|
+
pm_token_t content = parser->current;
|
16215
|
+
pm_string_t unescaped = parser->current_string;
|
16216
|
+
parser_lex(parser);
|
16217
|
+
|
16218
|
+
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16219
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16220
|
+
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
16221
|
+
|
16222
|
+
// Kind of odd behavior, but basically if we have an
|
16223
|
+
// unterminated string and it ends in a newline, we back up one
|
16224
|
+
// character so that the error message is on the last line of
|
16225
|
+
// content in the string.
|
16226
|
+
if (!accept1(parser, PM_TOKEN_STRING_END)) {
|
16227
|
+
const uint8_t *location = parser->previous.end;
|
16228
|
+
if (location > parser->start && location[-1] == '\n') location--;
|
16229
|
+
pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
|
16230
|
+
|
16231
|
+
parser->previous.start = parser->previous.end;
|
16232
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
16233
|
+
}
|
16234
|
+
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16235
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16236
|
+
} else {
|
16237
|
+
// If we get here, then we have interpolation so we'll need
|
16238
|
+
// to create a string or symbol node with interpolation.
|
16239
|
+
pm_node_list_t parts = { 0 };
|
16240
|
+
pm_token_t string_opening = not_provided(parser);
|
16241
|
+
pm_token_t string_closing = not_provided(parser);
|
16242
|
+
|
16243
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
|
16244
|
+
pm_node_flag_set(part, parse_unescaped_encoding(parser));
|
16245
|
+
pm_node_list_append(&parts, part);
|
16246
|
+
|
16247
|
+
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16248
|
+
if ((part = parse_string_part(parser)) != NULL) {
|
16249
|
+
pm_node_list_append(&parts, part);
|
16250
|
+
}
|
16251
|
+
}
|
16252
|
+
|
16253
|
+
if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16254
|
+
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16255
|
+
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16256
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16257
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16258
|
+
} else {
|
16259
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16260
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16261
|
+
}
|
16262
|
+
|
16263
|
+
pm_node_list_free(&parts);
|
16264
|
+
}
|
16265
|
+
} else {
|
16266
|
+
// If we get here, then the first part of the string is not plain
|
16267
|
+
// string content, in which case we need to parse the string as an
|
16268
|
+
// interpolated string.
|
16269
|
+
pm_node_list_t parts = { 0 };
|
16270
|
+
pm_node_t *part;
|
16271
|
+
|
16272
|
+
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16273
|
+
if ((part = parse_string_part(parser)) != NULL) {
|
16274
|
+
pm_node_list_append(&parts, part);
|
16275
|
+
}
|
16276
|
+
}
|
16277
|
+
|
16278
|
+
if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16279
|
+
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16280
|
+
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16281
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16282
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16283
|
+
} else {
|
16284
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16285
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16286
|
+
}
|
16287
|
+
|
16288
|
+
pm_node_list_free(&parts);
|
16289
|
+
}
|
16290
|
+
|
16291
|
+
if (current == NULL) {
|
16292
|
+
// If the node we just parsed is a symbol node, then we can't
|
16293
|
+
// concatenate it with anything else, so we can now return that
|
16294
|
+
// node.
|
16295
|
+
if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
|
16296
|
+
return node;
|
16297
|
+
}
|
16298
|
+
|
16299
|
+
// If we don't already have a node, then it's fine and we can just
|
16300
|
+
// set the result to be the node we just parsed.
|
16301
|
+
current = node;
|
16302
|
+
} else {
|
16303
|
+
// Otherwise we need to check the type of the node we just parsed.
|
16304
|
+
// If it cannot be concatenated with the previous node, then we'll
|
16305
|
+
// need to add a syntax error.
|
16306
|
+
if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
|
16307
|
+
pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
|
16308
|
+
}
|
16309
|
+
|
16310
|
+
// If we haven't already created our container for concatenation,
|
16311
|
+
// we'll do that now.
|
16312
|
+
if (!concating) {
|
16313
|
+
concating = true;
|
16314
|
+
pm_token_t bounds = not_provided(parser);
|
16315
|
+
|
16316
|
+
pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
|
16317
|
+
pm_interpolated_string_node_append(container, current);
|
16318
|
+
current = (pm_node_t *) container;
|
16319
|
+
}
|
16320
|
+
|
16321
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
|
16322
|
+
}
|
16323
|
+
}
|
16324
|
+
|
16325
|
+
return current;
|
16326
|
+
}
|
16327
|
+
|
15935
16328
|
#define PM_PARSE_PATTERN_SINGLE 0
|
15936
16329
|
#define PM_PARSE_PATTERN_TOP 1
|
15937
16330
|
#define PM_PARSE_PATTERN_MULTI 2
|
@@ -16214,7 +16607,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
|
|
16214
16607
|
*/
|
16215
16608
|
static void
|
16216
16609
|
parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
|
16217
|
-
if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
|
16610
|
+
if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
|
16218
16611
|
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
|
16219
16612
|
}
|
16220
16613
|
}
|
@@ -16289,8 +16682,20 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
16289
16682
|
pm_node_list_append(&assocs, assoc);
|
16290
16683
|
}
|
16291
16684
|
} else {
|
16292
|
-
|
16293
|
-
|
16685
|
+
pm_node_t *key;
|
16686
|
+
|
16687
|
+
if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
16688
|
+
key = parse_strings(parser, NULL);
|
16689
|
+
|
16690
|
+
if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
|
16691
|
+
pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
|
16692
|
+
} else if (!pm_symbol_node_label_p(key)) {
|
16693
|
+
pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
|
16694
|
+
}
|
16695
|
+
} else {
|
16696
|
+
expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
|
16697
|
+
key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
16698
|
+
}
|
16294
16699
|
|
16295
16700
|
parse_pattern_hash_key(parser, &keys, key);
|
16296
16701
|
pm_node_t *value = NULL;
|
@@ -16502,19 +16907,8 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
16502
16907
|
pm_node_t *variable = (pm_node_t *) parse_variable(parser);
|
16503
16908
|
|
16504
16909
|
if (variable == NULL) {
|
16505
|
-
|
16506
|
-
|
16507
|
-
!parser->current_scope->closed &&
|
16508
|
-
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
16509
|
-
pm_token_is_it(parser->previous.start, parser->previous.end)
|
16510
|
-
) {
|
16511
|
-
pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
|
16512
|
-
if (read == NULL) read = pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
16513
|
-
variable = (pm_node_t *) read;
|
16514
|
-
} else {
|
16515
|
-
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
|
16516
|
-
variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
|
16517
|
-
}
|
16910
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
|
16911
|
+
variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
|
16518
16912
|
}
|
16519
16913
|
|
16520
16914
|
return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
|
@@ -16762,276 +17156,67 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
|
|
16762
17156
|
}
|
16763
17157
|
|
16764
17158
|
trailing_rest = true;
|
16765
|
-
} else {
|
16766
|
-
node = parse_pattern_primitives(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA);
|
16767
|
-
}
|
16768
|
-
|
16769
|
-
pm_node_list_append(&nodes, node);
|
16770
|
-
}
|
16771
|
-
|
16772
|
-
// If the first pattern and the last pattern are rest patterns, then we will
|
16773
|
-
// call this a find pattern, regardless of how many rest patterns are in
|
16774
|
-
// between because we know we already added the appropriate errors.
|
16775
|
-
// Otherwise we will create an array pattern.
|
16776
|
-
if (PM_NODE_TYPE_P(nodes.nodes[0], PM_SPLAT_NODE) && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
|
16777
|
-
node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
|
16778
|
-
} else {
|
16779
|
-
node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
|
16780
|
-
}
|
16781
|
-
|
16782
|
-
xfree(nodes.nodes);
|
16783
|
-
} else if (leading_rest) {
|
16784
|
-
// Otherwise, if we parsed a single splat pattern, then we know we have an
|
16785
|
-
// array pattern, so we can go ahead and create that node.
|
16786
|
-
node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
|
16787
|
-
}
|
16788
|
-
|
16789
|
-
return node;
|
16790
|
-
}
|
16791
|
-
|
16792
|
-
/**
|
16793
|
-
* Incorporate a negative sign into a numeric node by subtracting 1 character
|
16794
|
-
* from its start bounds. If it's a compound node, then we will recursively
|
16795
|
-
* apply this function to its value.
|
16796
|
-
*/
|
16797
|
-
static inline void
|
16798
|
-
parse_negative_numeric(pm_node_t *node) {
|
16799
|
-
switch (PM_NODE_TYPE(node)) {
|
16800
|
-
case PM_INTEGER_NODE: {
|
16801
|
-
pm_integer_node_t *cast = (pm_integer_node_t *) node;
|
16802
|
-
cast->base.location.start--;
|
16803
|
-
cast->value.negative = true;
|
16804
|
-
break;
|
16805
|
-
}
|
16806
|
-
case PM_FLOAT_NODE: {
|
16807
|
-
pm_float_node_t *cast = (pm_float_node_t *) node;
|
16808
|
-
cast->base.location.start--;
|
16809
|
-
cast->value = -cast->value;
|
16810
|
-
break;
|
16811
|
-
}
|
16812
|
-
case PM_RATIONAL_NODE:
|
16813
|
-
node->location.start--;
|
16814
|
-
parse_negative_numeric(((pm_rational_node_t *) node)->numeric);
|
16815
|
-
break;
|
16816
|
-
case PM_IMAGINARY_NODE:
|
16817
|
-
node->location.start--;
|
16818
|
-
parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
|
16819
|
-
break;
|
16820
|
-
default:
|
16821
|
-
assert(false && "unreachable");
|
16822
|
-
break;
|
16823
|
-
}
|
16824
|
-
}
|
16825
|
-
|
16826
|
-
/**
|
16827
|
-
* Return a string content token at a particular location that is empty.
|
16828
|
-
*/
|
16829
|
-
static pm_token_t
|
16830
|
-
parse_strings_empty_content(const uint8_t *location) {
|
16831
|
-
return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
|
16832
|
-
}
|
16833
|
-
|
16834
|
-
/**
|
16835
|
-
* Parse a set of strings that could be concatenated together.
|
16836
|
-
*/
|
16837
|
-
static inline pm_node_t *
|
16838
|
-
parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
16839
|
-
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
|
16840
|
-
|
16841
|
-
bool concating = false;
|
16842
|
-
bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
|
16843
|
-
|
16844
|
-
while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
16845
|
-
pm_node_t *node = NULL;
|
16846
|
-
|
16847
|
-
// Here we have found a string literal. We'll parse it and add it to
|
16848
|
-
// the list of strings.
|
16849
|
-
const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
16850
|
-
assert(lex_mode->mode == PM_LEX_STRING);
|
16851
|
-
bool lex_interpolation = lex_mode->as.string.interpolation;
|
16852
|
-
|
16853
|
-
pm_token_t opening = parser->current;
|
16854
|
-
parser_lex(parser);
|
16855
|
-
|
16856
|
-
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16857
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16858
|
-
// If we get here, then we have an end immediately after a
|
16859
|
-
// start. In that case we'll create an empty content token and
|
16860
|
-
// return an uninterpolated string.
|
16861
|
-
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16862
|
-
pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
|
16863
|
-
|
16864
|
-
pm_string_shared_init(&string->unescaped, content.start, content.end);
|
16865
|
-
node = (pm_node_t *) string;
|
16866
|
-
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16867
|
-
// If we get here, then we have an end of a label immediately
|
16868
|
-
// after a start. In that case we'll create an empty symbol
|
16869
|
-
// node.
|
16870
|
-
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16871
|
-
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
|
16872
|
-
|
16873
|
-
pm_string_shared_init(&symbol->unescaped, content.start, content.end);
|
16874
|
-
node = (pm_node_t *) symbol;
|
16875
|
-
} else if (!lex_interpolation) {
|
16876
|
-
// If we don't accept interpolation then we expect the string to
|
16877
|
-
// start with a single string content node.
|
16878
|
-
pm_string_t unescaped;
|
16879
|
-
pm_token_t content;
|
16880
|
-
if (match1(parser, PM_TOKEN_EOF)) {
|
16881
|
-
unescaped = PM_STRING_EMPTY;
|
16882
|
-
content = not_provided(parser);
|
16883
|
-
} else {
|
16884
|
-
unescaped = parser->current_string;
|
16885
|
-
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
|
16886
|
-
content = parser->previous;
|
16887
|
-
}
|
16888
|
-
|
16889
|
-
// It is unfortunately possible to have multiple string content
|
16890
|
-
// nodes in a row in the case that there's heredoc content in
|
16891
|
-
// the middle of the string, like this cursed example:
|
16892
|
-
//
|
16893
|
-
// <<-END+'b
|
16894
|
-
// a
|
16895
|
-
// END
|
16896
|
-
// c'+'d'
|
16897
|
-
//
|
16898
|
-
// In that case we need to switch to an interpolated string to
|
16899
|
-
// be able to contain all of the parts.
|
16900
|
-
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16901
|
-
pm_node_list_t parts = { 0 };
|
16902
|
-
|
16903
|
-
pm_token_t delimiters = not_provided(parser);
|
16904
|
-
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
|
16905
|
-
pm_node_list_append(&parts, part);
|
16906
|
-
|
16907
|
-
do {
|
16908
|
-
part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
|
16909
|
-
pm_node_list_append(&parts, part);
|
16910
|
-
parser_lex(parser);
|
16911
|
-
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
16912
|
-
|
16913
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16914
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16915
|
-
|
16916
|
-
pm_node_list_free(&parts);
|
16917
|
-
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16918
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16919
|
-
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16920
|
-
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
16921
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16922
|
-
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
|
16923
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16924
|
-
} else {
|
16925
|
-
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
|
16926
|
-
parser->previous.start = parser->previous.end;
|
16927
|
-
parser->previous.type = PM_TOKEN_MISSING;
|
16928
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16929
|
-
}
|
16930
|
-
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16931
|
-
// In this case we've hit string content so we know the string
|
16932
|
-
// at least has something in it. We'll need to check if the
|
16933
|
-
// following token is the end (in which case we can return a
|
16934
|
-
// plain string) or if it's not then it has interpolation.
|
16935
|
-
pm_token_t content = parser->current;
|
16936
|
-
pm_string_t unescaped = parser->current_string;
|
16937
|
-
parser_lex(parser);
|
16938
|
-
|
16939
|
-
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16940
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16941
|
-
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
16942
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16943
|
-
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16944
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16945
|
-
} else {
|
16946
|
-
// If we get here, then we have interpolation so we'll need
|
16947
|
-
// to create a string or symbol node with interpolation.
|
16948
|
-
pm_node_list_t parts = { 0 };
|
16949
|
-
pm_token_t string_opening = not_provided(parser);
|
16950
|
-
pm_token_t string_closing = not_provided(parser);
|
16951
|
-
|
16952
|
-
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
|
16953
|
-
pm_node_flag_set(part, parse_unescaped_encoding(parser));
|
16954
|
-
pm_node_list_append(&parts, part);
|
16955
|
-
|
16956
|
-
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16957
|
-
if ((part = parse_string_part(parser)) != NULL) {
|
16958
|
-
pm_node_list_append(&parts, part);
|
16959
|
-
}
|
16960
|
-
}
|
16961
|
-
|
16962
|
-
if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16963
|
-
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16964
|
-
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16965
|
-
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16966
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16967
|
-
} else {
|
16968
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16969
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16970
|
-
}
|
16971
|
-
|
16972
|
-
pm_node_list_free(&parts);
|
16973
|
-
}
|
16974
|
-
} else {
|
16975
|
-
// If we get here, then the first part of the string is not plain
|
16976
|
-
// string content, in which case we need to parse the string as an
|
16977
|
-
// interpolated string.
|
16978
|
-
pm_node_list_t parts = { 0 };
|
16979
|
-
pm_node_t *part;
|
16980
|
-
|
16981
|
-
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16982
|
-
if ((part = parse_string_part(parser)) != NULL) {
|
16983
|
-
pm_node_list_append(&parts, part);
|
16984
|
-
}
|
16985
|
-
}
|
16986
|
-
|
16987
|
-
if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16988
|
-
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16989
|
-
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16990
|
-
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16991
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16992
|
-
} else {
|
16993
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16994
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
17159
|
+
} else {
|
17160
|
+
node = parse_pattern_primitives(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA);
|
16995
17161
|
}
|
16996
17162
|
|
16997
|
-
|
17163
|
+
pm_node_list_append(&nodes, node);
|
16998
17164
|
}
|
16999
17165
|
|
17000
|
-
|
17001
|
-
|
17002
|
-
|
17003
|
-
|
17004
|
-
|
17005
|
-
|
17006
|
-
}
|
17007
|
-
|
17008
|
-
// If we don't already have a node, then it's fine and we can just
|
17009
|
-
// set the result to be the node we just parsed.
|
17010
|
-
current = node;
|
17166
|
+
// If the first pattern and the last pattern are rest patterns, then we will
|
17167
|
+
// call this a find pattern, regardless of how many rest patterns are in
|
17168
|
+
// between because we know we already added the appropriate errors.
|
17169
|
+
// Otherwise we will create an array pattern.
|
17170
|
+
if (PM_NODE_TYPE_P(nodes.nodes[0], PM_SPLAT_NODE) && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
|
17171
|
+
node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
|
17011
17172
|
} else {
|
17012
|
-
|
17013
|
-
|
17014
|
-
// need to add a syntax error.
|
17015
|
-
if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
|
17016
|
-
pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
|
17017
|
-
}
|
17173
|
+
node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
|
17174
|
+
}
|
17018
17175
|
|
17019
|
-
|
17020
|
-
|
17021
|
-
|
17022
|
-
|
17023
|
-
|
17176
|
+
xfree(nodes.nodes);
|
17177
|
+
} else if (leading_rest) {
|
17178
|
+
// Otherwise, if we parsed a single splat pattern, then we know we have an
|
17179
|
+
// array pattern, so we can go ahead and create that node.
|
17180
|
+
node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
|
17181
|
+
}
|
17024
17182
|
|
17025
|
-
|
17026
|
-
|
17027
|
-
current = (pm_node_t *) container;
|
17028
|
-
}
|
17183
|
+
return node;
|
17184
|
+
}
|
17029
17185
|
|
17030
|
-
|
17186
|
+
/**
|
17187
|
+
* Incorporate a negative sign into a numeric node by subtracting 1 character
|
17188
|
+
* from its start bounds. If it's a compound node, then we will recursively
|
17189
|
+
* apply this function to its value.
|
17190
|
+
*/
|
17191
|
+
static inline void
|
17192
|
+
parse_negative_numeric(pm_node_t *node) {
|
17193
|
+
switch (PM_NODE_TYPE(node)) {
|
17194
|
+
case PM_INTEGER_NODE: {
|
17195
|
+
pm_integer_node_t *cast = (pm_integer_node_t *) node;
|
17196
|
+
cast->base.location.start--;
|
17197
|
+
cast->value.negative = true;
|
17198
|
+
break;
|
17199
|
+
}
|
17200
|
+
case PM_FLOAT_NODE: {
|
17201
|
+
pm_float_node_t *cast = (pm_float_node_t *) node;
|
17202
|
+
cast->base.location.start--;
|
17203
|
+
cast->value = -cast->value;
|
17204
|
+
break;
|
17205
|
+
}
|
17206
|
+
case PM_RATIONAL_NODE: {
|
17207
|
+
pm_rational_node_t *cast = (pm_rational_node_t *) node;
|
17208
|
+
cast->base.location.start--;
|
17209
|
+
cast->numerator.negative = true;
|
17210
|
+
break;
|
17031
17211
|
}
|
17212
|
+
case PM_IMAGINARY_NODE:
|
17213
|
+
node->location.start--;
|
17214
|
+
parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
|
17215
|
+
break;
|
17216
|
+
default:
|
17217
|
+
assert(false && "unreachable");
|
17218
|
+
break;
|
17032
17219
|
}
|
17033
|
-
|
17034
|
-
return current;
|
17035
17220
|
}
|
17036
17221
|
|
17037
17222
|
/**
|
@@ -17229,6 +17414,63 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
|
|
17229
17414
|
}
|
17230
17415
|
}
|
17231
17416
|
|
17417
|
+
/**
|
17418
|
+
* This struct is used to pass information between the regular expression parser
|
17419
|
+
* and the error callback.
|
17420
|
+
*/
|
17421
|
+
typedef struct {
|
17422
|
+
/** The parser that we are parsing the regular expression for. */
|
17423
|
+
pm_parser_t *parser;
|
17424
|
+
|
17425
|
+
/** The start of the regular expression. */
|
17426
|
+
const uint8_t *start;
|
17427
|
+
|
17428
|
+
/** The end of the regular expression. */
|
17429
|
+
const uint8_t *end;
|
17430
|
+
|
17431
|
+
/**
|
17432
|
+
* Whether or not the source of the regular expression is shared. This
|
17433
|
+
* impacts the location of error messages, because if it is shared then we
|
17434
|
+
* can use the location directly and if it is not, then we use the bounds of
|
17435
|
+
* the regular expression itself.
|
17436
|
+
*/
|
17437
|
+
bool shared;
|
17438
|
+
} parse_regular_expression_error_data_t;
|
17439
|
+
|
17440
|
+
/**
|
17441
|
+
* This callback is called when the regular expression parser encounters a
|
17442
|
+
* syntax error.
|
17443
|
+
*/
|
17444
|
+
static void
|
17445
|
+
parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
|
17446
|
+
parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
|
17447
|
+
pm_location_t location;
|
17448
|
+
|
17449
|
+
if (callback_data->shared) {
|
17450
|
+
location = (pm_location_t) { .start = start, .end = end };
|
17451
|
+
} else {
|
17452
|
+
location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
|
17453
|
+
}
|
17454
|
+
|
17455
|
+
PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
|
17456
|
+
}
|
17457
|
+
|
17458
|
+
/**
|
17459
|
+
* Parse the errors for the regular expression and add them to the parser.
|
17460
|
+
*/
|
17461
|
+
static void
|
17462
|
+
parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
|
17463
|
+
const pm_string_t *unescaped = &node->unescaped;
|
17464
|
+
parse_regular_expression_error_data_t error_data = {
|
17465
|
+
.parser = parser,
|
17466
|
+
.start = node->base.location.start,
|
17467
|
+
.end = node->base.location.end,
|
17468
|
+
.shared = unescaped->type == PM_STRING_SHARED
|
17469
|
+
};
|
17470
|
+
|
17471
|
+
pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), NULL, NULL, parse_regular_expression_error, &error_data);
|
17472
|
+
}
|
17473
|
+
|
17232
17474
|
/**
|
17233
17475
|
* Parse an expression that begins with the previous node that we just lexed.
|
17234
17476
|
*/
|
@@ -17249,8 +17491,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17249
17491
|
break;
|
17250
17492
|
}
|
17251
17493
|
|
17252
|
-
|
17253
|
-
|
17494
|
+
// Ensure that we have a comma between elements in the array.
|
17495
|
+
if ((pm_array_node_size(array) != 0) && !accept1(parser, PM_TOKEN_COMMA)) {
|
17496
|
+
const uint8_t *location = parser->previous.end;
|
17497
|
+
PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
|
17498
|
+
|
17499
|
+
parser->previous.start = location;
|
17500
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
17254
17501
|
}
|
17255
17502
|
|
17256
17503
|
// If we have a right bracket immediately following a comma,
|
@@ -17428,7 +17675,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17428
17675
|
|
17429
17676
|
// If we didn't find a terminator and we didn't find a right
|
17430
17677
|
// parenthesis, then this is a syntax error.
|
17431
|
-
if (!terminator_found) {
|
17678
|
+
if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
|
17432
17679
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
17433
17680
|
}
|
17434
17681
|
|
@@ -17457,7 +17704,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17457
17704
|
if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
|
17458
17705
|
} else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
17459
17706
|
break;
|
17460
|
-
} else {
|
17707
|
+
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
17708
|
+
// If we're at the end of the file, then we're going to add
|
17709
|
+
// an error after this for the ) anyway.
|
17461
17710
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
17462
17711
|
}
|
17463
17712
|
}
|
@@ -17676,8 +17925,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17676
17925
|
) {
|
17677
17926
|
pm_arguments_t arguments = { 0 };
|
17678
17927
|
parse_arguments_list(parser, &arguments, true, accepts_command_call);
|
17679
|
-
|
17680
17928
|
pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
|
17929
|
+
|
17930
|
+
if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
|
17931
|
+
// If we're about to convert an 'it' implicit local
|
17932
|
+
// variable read into a method call, we need to remove
|
17933
|
+
// it from the list of implicit local variables.
|
17934
|
+
parse_target_implicit_parameter(parser, node);
|
17935
|
+
} else {
|
17936
|
+
// Otherwise, we're about to convert a regular local
|
17937
|
+
// variable read into a method call, in which case we
|
17938
|
+
// need to indicate that this was not a read for the
|
17939
|
+
// purposes of warnings.
|
17940
|
+
assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
|
17941
|
+
|
17942
|
+
if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
|
17943
|
+
parse_target_implicit_parameter(parser, node);
|
17944
|
+
} else {
|
17945
|
+
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
|
17946
|
+
pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
|
17947
|
+
}
|
17948
|
+
}
|
17949
|
+
|
17681
17950
|
pm_node_destroy(parser, node);
|
17682
17951
|
return (pm_node_t *) fcall;
|
17683
17952
|
}
|
@@ -17685,31 +17954,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17685
17954
|
|
17686
17955
|
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
|
17687
17956
|
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
|
17688
|
-
} else {
|
17689
|
-
// Check if `it` is not going to be assigned.
|
17690
|
-
switch (parser->current.type) {
|
17691
|
-
case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
|
17692
|
-
case PM_TOKEN_AMPERSAND_EQUAL:
|
17693
|
-
case PM_TOKEN_CARET_EQUAL:
|
17694
|
-
case PM_TOKEN_EQUAL:
|
17695
|
-
case PM_TOKEN_GREATER_GREATER_EQUAL:
|
17696
|
-
case PM_TOKEN_LESS_LESS_EQUAL:
|
17697
|
-
case PM_TOKEN_MINUS_EQUAL:
|
17698
|
-
case PM_TOKEN_PARENTHESIS_RIGHT:
|
17699
|
-
case PM_TOKEN_PERCENT_EQUAL:
|
17700
|
-
case PM_TOKEN_PIPE_EQUAL:
|
17701
|
-
case PM_TOKEN_PIPE_PIPE_EQUAL:
|
17702
|
-
case PM_TOKEN_PLUS_EQUAL:
|
17703
|
-
case PM_TOKEN_SLASH_EQUAL:
|
17704
|
-
case PM_TOKEN_STAR_EQUAL:
|
17705
|
-
case PM_TOKEN_STAR_STAR_EQUAL:
|
17706
|
-
break;
|
17707
|
-
default:
|
17708
|
-
// Once we know it's neither a method call nor an
|
17709
|
-
// assignment, we can finally create `it` default
|
17710
|
-
// parameter.
|
17711
|
-
node = pm_node_check_it(parser, node);
|
17712
|
-
}
|
17713
17957
|
}
|
17714
17958
|
|
17715
17959
|
return node;
|
@@ -17970,6 +18214,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17970
18214
|
// as frozen because when clause strings are frozen.
|
17971
18215
|
if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
|
17972
18216
|
pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
|
18217
|
+
} else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
|
18218
|
+
pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
|
17973
18219
|
}
|
17974
18220
|
|
17975
18221
|
pm_when_clause_static_literals_add(parser, &literals, condition);
|
@@ -18375,7 +18621,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18375
18621
|
|
18376
18622
|
if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
|
18377
18623
|
receiver = parse_variable_call(parser);
|
18378
|
-
receiver = pm_node_check_it(parser, receiver);
|
18379
18624
|
|
18380
18625
|
pm_parser_scope_push(parser, true);
|
18381
18626
|
lex_state_set(parser, PM_LEX_STATE_FNAME);
|
@@ -18712,7 +18957,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18712
18957
|
if (match1(parser, PM_TOKEN_COMMA)) {
|
18713
18958
|
index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
|
18714
18959
|
} else {
|
18715
|
-
index = parse_target(parser, index, false);
|
18960
|
+
index = parse_target(parser, index, false, false);
|
18716
18961
|
}
|
18717
18962
|
|
18718
18963
|
context_pop(parser);
|
@@ -19347,13 +19592,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19347
19592
|
bool ascii_only = parser->current_regular_expression_ascii_only;
|
19348
19593
|
parser_lex(parser);
|
19349
19594
|
|
19350
|
-
// If we hit an end, then we can create a regular expression
|
19351
|
-
// without interpolation, which can be represented more
|
19352
|
-
// more easily compiled.
|
19595
|
+
// If we hit an end, then we can create a regular expression
|
19596
|
+
// node without interpolation, which can be represented more
|
19597
|
+
// succinctly and more easily compiled.
|
19353
19598
|
if (accept1(parser, PM_TOKEN_REGEXP_END)) {
|
19354
|
-
|
19355
|
-
|
19356
|
-
|
19599
|
+
pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
19600
|
+
|
19601
|
+
// If we're not immediately followed by a =~, then we want
|
19602
|
+
// to parse all of the errors at this point. If it is
|
19603
|
+
// followed by a =~, then it will get parsed higher up while
|
19604
|
+
// parsing the named captures as well.
|
19605
|
+
if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
|
19606
|
+
parse_regular_expression_errors(parser, node);
|
19607
|
+
}
|
19608
|
+
|
19609
|
+
pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
|
19610
|
+
return (pm_node_t *) node;
|
19357
19611
|
}
|
19358
19612
|
|
19359
19613
|
// If we get here, then we have interpolation so we'll need to create
|
@@ -19571,9 +19825,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19571
19825
|
|
19572
19826
|
switch (parser->current.type) {
|
19573
19827
|
case PM_TOKEN_PARENTHESIS_LEFT: {
|
19574
|
-
assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
|
19575
|
-
parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
|
19576
|
-
|
19577
19828
|
pm_token_t opening = parser->current;
|
19578
19829
|
parser_lex(parser);
|
19579
19830
|
|
@@ -19590,9 +19841,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19590
19841
|
break;
|
19591
19842
|
}
|
19592
19843
|
case PM_CASE_PARAMETER: {
|
19593
|
-
assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
|
19594
|
-
parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
|
19595
|
-
|
19596
19844
|
pm_accepts_block_stack_push(parser, false);
|
19597
19845
|
pm_token_t opening = not_provided(parser);
|
19598
19846
|
block_parameters = parse_block_parameters(parser, false, &opening, true);
|
@@ -19845,89 +20093,126 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
|
|
19845
20093
|
}
|
19846
20094
|
|
19847
20095
|
/**
|
19848
|
-
*
|
19849
|
-
*
|
20096
|
+
* This struct is used to pass information between the regular expression parser
|
20097
|
+
* and the named capture callback.
|
19850
20098
|
*/
|
19851
|
-
|
19852
|
-
|
19853
|
-
|
19854
|
-
pm_node_t *result;
|
20099
|
+
typedef struct {
|
20100
|
+
/** The parser that is parsing the regular expression. */
|
20101
|
+
pm_parser_t *parser;
|
19855
20102
|
|
19856
|
-
|
19857
|
-
|
19858
|
-
// are invalid, creating a MatchWriteNode is delaid here.
|
19859
|
-
pm_match_write_node_t *match = NULL;
|
19860
|
-
pm_constant_id_list_t names = { 0 };
|
20103
|
+
/** The call node wrapping the regular expression node. */
|
20104
|
+
pm_call_node_t *call;
|
19861
20105
|
|
19862
|
-
|
19863
|
-
|
20106
|
+
/** The match write node that is being created. */
|
20107
|
+
pm_match_write_node_t *match;
|
19864
20108
|
|
19865
|
-
|
19866
|
-
|
20109
|
+
/** The list of names that have been parsed. */
|
20110
|
+
pm_constant_id_list_t names;
|
19867
20111
|
|
19868
|
-
|
19869
|
-
|
20112
|
+
/**
|
20113
|
+
* Whether the content of the regular expression is shared. This impacts
|
20114
|
+
* whether or not we used owned constants or shared constants in the
|
20115
|
+
* constant pool for the names of the captures.
|
20116
|
+
*/
|
20117
|
+
bool shared;
|
20118
|
+
} parse_regular_expression_named_capture_data_t;
|
19870
20119
|
|
19871
|
-
|
19872
|
-
|
19873
|
-
|
20120
|
+
/**
|
20121
|
+
* This callback is called when the regular expression parser encounters a named
|
20122
|
+
* capture group.
|
20123
|
+
*/
|
20124
|
+
static void
|
20125
|
+
parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
|
20126
|
+
parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data;
|
19874
20127
|
|
19875
|
-
|
19876
|
-
|
19877
|
-
|
19878
|
-
location = (pm_location_t) { .start = source, .end = source + length };
|
19879
|
-
name = pm_parser_constant_id_location(parser, location.start, location.end);
|
19880
|
-
} else {
|
19881
|
-
// Otherwise, the name is a slice of the malloc-ed owned string,
|
19882
|
-
// in which case we need to copy it out into a new string.
|
19883
|
-
location = call->receiver->location;
|
20128
|
+
pm_parser_t *parser = callback_data->parser;
|
20129
|
+
pm_call_node_t *call = callback_data->call;
|
20130
|
+
pm_constant_id_list_t *names = &callback_data->names;
|
19884
20131
|
|
19885
|
-
|
19886
|
-
|
20132
|
+
const uint8_t *source = pm_string_source(capture);
|
20133
|
+
size_t length = pm_string_length(capture);
|
19887
20134
|
|
19888
|
-
|
19889
|
-
|
19890
|
-
}
|
20135
|
+
pm_location_t location;
|
20136
|
+
pm_constant_id_t name;
|
19891
20137
|
|
19892
|
-
|
19893
|
-
|
19894
|
-
|
19895
|
-
if (pm_constant_id_list_includes(&names, name)) continue;
|
19896
|
-
pm_constant_id_list_append(&names, name);
|
20138
|
+
// If the name of the capture group isn't a valid identifier, we do
|
20139
|
+
// not add it to the local table.
|
20140
|
+
if (!pm_slice_is_valid_local(parser, source, source + length)) return;
|
19897
20141
|
|
19898
|
-
|
19899
|
-
|
19900
|
-
|
19901
|
-
|
19902
|
-
|
20142
|
+
if (callback_data->shared) {
|
20143
|
+
// If the unescaped string is a slice of the source, then we can
|
20144
|
+
// copy the names directly. The pointers will line up.
|
20145
|
+
location = (pm_location_t) { .start = source, .end = source + length };
|
20146
|
+
name = pm_parser_constant_id_location(parser, location.start, location.end);
|
20147
|
+
} else {
|
20148
|
+
// Otherwise, the name is a slice of the malloc-ed owned string,
|
20149
|
+
// in which case we need to copy it out into a new string.
|
20150
|
+
location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
|
19903
20151
|
|
19904
|
-
|
19905
|
-
|
20152
|
+
void *memory = xmalloc(length);
|
20153
|
+
if (memory == NULL) abort();
|
19906
20154
|
|
19907
|
-
|
19908
|
-
|
19909
|
-
|
20155
|
+
memcpy(memory, source, length);
|
20156
|
+
name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
|
20157
|
+
}
|
19910
20158
|
|
19911
|
-
|
19912
|
-
|
19913
|
-
|
19914
|
-
|
19915
|
-
|
20159
|
+
// Add this name to the list of constants if it is valid, not duplicated,
|
20160
|
+
// and not a keyword.
|
20161
|
+
if (name != 0 && !pm_constant_id_list_includes(names, name)) {
|
20162
|
+
pm_constant_id_list_append(names, name);
|
20163
|
+
|
20164
|
+
int depth;
|
20165
|
+
if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
|
20166
|
+
// If the local is not already a local but it is a keyword, then we
|
20167
|
+
// do not want to add a capture for this.
|
20168
|
+
if (pm_local_is_keyword((const char *) source, length)) return;
|
20169
|
+
|
20170
|
+
// If the identifier is not already a local, then we will add it to
|
20171
|
+
// the local table.
|
20172
|
+
pm_parser_local_add(parser, name, location.start, location.end, 0);
|
19916
20173
|
}
|
19917
20174
|
|
19918
|
-
|
19919
|
-
|
19920
|
-
|
19921
|
-
|
20175
|
+
// Here we lazily create the MatchWriteNode since we know we're
|
20176
|
+
// about to add a target.
|
20177
|
+
if (callback_data->match == NULL) {
|
20178
|
+
callback_data->match = pm_match_write_node_create(parser, call);
|
19922
20179
|
}
|
19923
20180
|
|
19924
|
-
|
19925
|
-
|
19926
|
-
|
20181
|
+
// Next, create the local variable target and add it to the list of
|
20182
|
+
// targets for the match.
|
20183
|
+
pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
|
20184
|
+
pm_node_list_append(&callback_data->match->targets, target);
|
19927
20185
|
}
|
20186
|
+
}
|
19928
20187
|
|
19929
|
-
|
19930
|
-
|
20188
|
+
/**
|
20189
|
+
* Potentially change a =~ with a regular expression with named captures into a
|
20190
|
+
* match write node.
|
20191
|
+
*/
|
20192
|
+
static pm_node_t *
|
20193
|
+
parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
|
20194
|
+
parse_regular_expression_named_capture_data_t callback_data = {
|
20195
|
+
.parser = parser,
|
20196
|
+
.call = call,
|
20197
|
+
.names = { 0 },
|
20198
|
+
.shared = content->type == PM_STRING_SHARED
|
20199
|
+
};
|
20200
|
+
|
20201
|
+
parse_regular_expression_error_data_t error_data = {
|
20202
|
+
.parser = parser,
|
20203
|
+
.start = call->receiver->location.start,
|
20204
|
+
.end = call->receiver->location.end,
|
20205
|
+
.shared = content->type == PM_STRING_SHARED
|
20206
|
+
};
|
20207
|
+
|
20208
|
+
pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
|
20209
|
+
pm_constant_id_list_free(&callback_data.names);
|
20210
|
+
|
20211
|
+
if (callback_data.match != NULL) {
|
20212
|
+
return (pm_node_t *) callback_data.match;
|
20213
|
+
} else {
|
20214
|
+
return (pm_node_t *) call;
|
20215
|
+
}
|
19931
20216
|
}
|
19932
20217
|
|
19933
20218
|
static inline pm_node_t *
|
@@ -20044,7 +20329,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20044
20329
|
return result;
|
20045
20330
|
}
|
20046
20331
|
case PM_CALL_NODE: {
|
20047
|
-
parser_lex(parser);
|
20048
20332
|
pm_call_node_t *cast = (pm_call_node_t *) node;
|
20049
20333
|
|
20050
20334
|
// If we have a vcall (a method with no arguments and no
|
@@ -20055,6 +20339,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20055
20339
|
pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
|
20056
20340
|
|
20057
20341
|
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
|
20342
|
+
parser_lex(parser);
|
20343
|
+
|
20058
20344
|
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
|
20059
20345
|
pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
|
20060
20346
|
|
@@ -20062,6 +20348,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20062
20348
|
return result;
|
20063
20349
|
}
|
20064
20350
|
|
20351
|
+
// Move past the token here so that we have already added
|
20352
|
+
// the local variable by this point.
|
20353
|
+
parser_lex(parser);
|
20354
|
+
|
20065
20355
|
// If there is no call operator and the message is "[]" then
|
20066
20356
|
// this is an aref expression, and we can transform it into
|
20067
20357
|
// an aset expression.
|
@@ -20157,7 +20447,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20157
20447
|
return result;
|
20158
20448
|
}
|
20159
20449
|
case PM_CALL_NODE: {
|
20160
|
-
parser_lex(parser);
|
20161
20450
|
pm_call_node_t *cast = (pm_call_node_t *) node;
|
20162
20451
|
|
20163
20452
|
// If we have a vcall (a method with no arguments and no
|
@@ -20168,6 +20457,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20168
20457
|
pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
|
20169
20458
|
|
20170
20459
|
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
|
20460
|
+
parser_lex(parser);
|
20461
|
+
|
20171
20462
|
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
|
20172
20463
|
pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
|
20173
20464
|
|
@@ -20175,6 +20466,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20175
20466
|
return result;
|
20176
20467
|
}
|
20177
20468
|
|
20469
|
+
// Move past the token here so that we have already added
|
20470
|
+
// the local variable by this point.
|
20471
|
+
parser_lex(parser);
|
20472
|
+
|
20178
20473
|
// If there is no call operator and the message is "[]" then
|
20179
20474
|
// this is an aref expression, and we can transform it into
|
20180
20475
|
// an aset expression.
|
@@ -20584,7 +20879,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20584
20879
|
|
20585
20880
|
if (
|
20586
20881
|
(parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
|
20587
|
-
(token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))
|
20882
|
+
(accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
|
20588
20883
|
) {
|
20589
20884
|
// If we have a constant immediately following a '::' operator, then
|
20590
20885
|
// this can either be a constant path or a method call, depending on
|
@@ -21127,7 +21422,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
21127
21422
|
|
21128
21423
|
// Scopes given from the outside are not allowed to have numbered
|
21129
21424
|
// parameters.
|
21130
|
-
parser->current_scope->
|
21425
|
+
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
|
21131
21426
|
|
21132
21427
|
for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
|
21133
21428
|
const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
|
@@ -21515,331 +21810,3 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
|
|
21515
21810
|
}
|
21516
21811
|
|
21517
21812
|
#endif
|
21518
|
-
|
21519
|
-
/** An error that is going to be formatted into the output. */
|
21520
|
-
typedef struct {
|
21521
|
-
/** A pointer to the diagnostic that was generated during parsing. */
|
21522
|
-
pm_diagnostic_t *error;
|
21523
|
-
|
21524
|
-
/** The start line of the diagnostic message. */
|
21525
|
-
int32_t line;
|
21526
|
-
|
21527
|
-
/** The column start of the diagnostic message. */
|
21528
|
-
uint32_t column_start;
|
21529
|
-
|
21530
|
-
/** The column end of the diagnostic message. */
|
21531
|
-
uint32_t column_end;
|
21532
|
-
} pm_error_t;
|
21533
|
-
|
21534
|
-
/** The format that will be used to format the errors into the output. */
|
21535
|
-
typedef struct {
|
21536
|
-
/** The prefix that will be used for line numbers. */
|
21537
|
-
const char *number_prefix;
|
21538
|
-
|
21539
|
-
/** The prefix that will be used for blank lines. */
|
21540
|
-
const char *blank_prefix;
|
21541
|
-
|
21542
|
-
/** The divider that will be used between sections of source code. */
|
21543
|
-
const char *divider;
|
21544
|
-
|
21545
|
-
/** The length of the blank prefix. */
|
21546
|
-
size_t blank_prefix_length;
|
21547
|
-
|
21548
|
-
/** The length of the divider. */
|
21549
|
-
size_t divider_length;
|
21550
|
-
} pm_error_format_t;
|
21551
|
-
|
21552
|
-
#define PM_COLOR_GRAY "\033[38;5;102m"
|
21553
|
-
#define PM_COLOR_RED "\033[1;31m"
|
21554
|
-
#define PM_COLOR_RESET "\033[m"
|
21555
|
-
|
21556
|
-
static inline pm_error_t *
|
21557
|
-
pm_parser_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
|
21558
|
-
pm_error_t *errors = xcalloc(error_list->size, sizeof(pm_error_t));
|
21559
|
-
if (errors == NULL) return NULL;
|
21560
|
-
|
21561
|
-
int32_t start_line = parser->start_line;
|
21562
|
-
for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
|
21563
|
-
pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
|
21564
|
-
pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
|
21565
|
-
|
21566
|
-
// We're going to insert this error into the array in sorted order. We
|
21567
|
-
// do this by finding the first error that has a line number greater
|
21568
|
-
// than the current error and then inserting the current error before
|
21569
|
-
// that one.
|
21570
|
-
size_t index = 0;
|
21571
|
-
while (
|
21572
|
-
(index < error_list->size) &&
|
21573
|
-
(errors[index].error != NULL) &&
|
21574
|
-
(
|
21575
|
-
(errors[index].line < start.line) ||
|
21576
|
-
((errors[index].line == start.line) && (errors[index].column_start < start.column))
|
21577
|
-
)
|
21578
|
-
) index++;
|
21579
|
-
|
21580
|
-
// Now we're going to shift all of the errors after this one down one
|
21581
|
-
// index to make room for the new error.
|
21582
|
-
if (index + 1 < error_list->size) {
|
21583
|
-
memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
|
21584
|
-
}
|
21585
|
-
|
21586
|
-
// Finally, we'll insert the error into the array.
|
21587
|
-
uint32_t column_end;
|
21588
|
-
if (start.line == end.line) {
|
21589
|
-
column_end = end.column;
|
21590
|
-
} else {
|
21591
|
-
column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
|
21592
|
-
}
|
21593
|
-
|
21594
|
-
// Ensure we have at least one column of error.
|
21595
|
-
if (start.column == column_end) column_end++;
|
21596
|
-
|
21597
|
-
errors[index] = (pm_error_t) {
|
21598
|
-
.error = error,
|
21599
|
-
.line = start.line,
|
21600
|
-
.column_start = start.column,
|
21601
|
-
.column_end = column_end
|
21602
|
-
};
|
21603
|
-
}
|
21604
|
-
|
21605
|
-
return errors;
|
21606
|
-
}
|
21607
|
-
|
21608
|
-
static inline void
|
21609
|
-
pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, pm_buffer_t *buffer) {
|
21610
|
-
int32_t line_delta = line - parser->start_line;
|
21611
|
-
assert(line_delta >= 0);
|
21612
|
-
|
21613
|
-
size_t index = (size_t) line_delta;
|
21614
|
-
assert(index < newline_list->size);
|
21615
|
-
|
21616
|
-
const uint8_t *start = &parser->start[newline_list->offsets[index]];
|
21617
|
-
const uint8_t *end;
|
21618
|
-
|
21619
|
-
if (index >= newline_list->size - 1) {
|
21620
|
-
end = parser->end;
|
21621
|
-
} else {
|
21622
|
-
end = &parser->start[newline_list->offsets[index + 1]];
|
21623
|
-
}
|
21624
|
-
|
21625
|
-
pm_buffer_append_format(buffer, number_prefix, line);
|
21626
|
-
pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
|
21627
|
-
|
21628
|
-
if (end == parser->end && end[-1] != '\n') {
|
21629
|
-
pm_buffer_append_string(buffer, "\n", 1);
|
21630
|
-
}
|
21631
|
-
}
|
21632
|
-
|
21633
|
-
/**
|
21634
|
-
* Format the errors on the parser into the given buffer.
|
21635
|
-
*/
|
21636
|
-
PRISM_EXPORTED_FUNCTION void
|
21637
|
-
pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, pm_buffer_t *buffer, bool colorize, bool inline_messages) {
|
21638
|
-
assert(error_list->size != 0);
|
21639
|
-
|
21640
|
-
// First, we're going to sort all of the errors by line number using an
|
21641
|
-
// insertion sort into a newly allocated array.
|
21642
|
-
const int32_t start_line = parser->start_line;
|
21643
|
-
const pm_newline_list_t *newline_list = &parser->newline_list;
|
21644
|
-
|
21645
|
-
pm_error_t *errors = pm_parser_errors_format_sort(parser, error_list, newline_list);
|
21646
|
-
if (errors == NULL) return;
|
21647
|
-
|
21648
|
-
// Now we're going to determine how we're going to format line numbers and
|
21649
|
-
// blank lines based on the maximum number of digits in the line numbers
|
21650
|
-
// that are going to be displaid.
|
21651
|
-
pm_error_format_t error_format;
|
21652
|
-
int32_t first_line_number = errors[0].line;
|
21653
|
-
int32_t last_line_number = errors[error_list->size - 1].line;
|
21654
|
-
|
21655
|
-
// If we have a maximum line number that is negative, then we're going to
|
21656
|
-
// use the absolute value for comparison but multiple by 10 to additionally
|
21657
|
-
// have a column for the negative sign.
|
21658
|
-
if (first_line_number < 0) first_line_number = (-first_line_number) * 10;
|
21659
|
-
if (last_line_number < 0) last_line_number = (-last_line_number) * 10;
|
21660
|
-
int32_t max_line_number = first_line_number > last_line_number ? first_line_number : last_line_number;
|
21661
|
-
|
21662
|
-
if (max_line_number < 10) {
|
21663
|
-
if (colorize) {
|
21664
|
-
error_format = (pm_error_format_t) {
|
21665
|
-
.number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
|
21666
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21667
|
-
.divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
|
21668
|
-
};
|
21669
|
-
} else {
|
21670
|
-
error_format = (pm_error_format_t) {
|
21671
|
-
.number_prefix = "%1" PRIi32 " | ",
|
21672
|
-
.blank_prefix = " | ",
|
21673
|
-
.divider = " ~~~~~\n"
|
21674
|
-
};
|
21675
|
-
}
|
21676
|
-
} else if (max_line_number < 100) {
|
21677
|
-
if (colorize) {
|
21678
|
-
error_format = (pm_error_format_t) {
|
21679
|
-
.number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
|
21680
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21681
|
-
.divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
|
21682
|
-
};
|
21683
|
-
} else {
|
21684
|
-
error_format = (pm_error_format_t) {
|
21685
|
-
.number_prefix = "%2" PRIi32 " | ",
|
21686
|
-
.blank_prefix = " | ",
|
21687
|
-
.divider = " ~~~~~~\n"
|
21688
|
-
};
|
21689
|
-
}
|
21690
|
-
} else if (max_line_number < 1000) {
|
21691
|
-
if (colorize) {
|
21692
|
-
error_format = (pm_error_format_t) {
|
21693
|
-
.number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
|
21694
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21695
|
-
.divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
|
21696
|
-
};
|
21697
|
-
} else {
|
21698
|
-
error_format = (pm_error_format_t) {
|
21699
|
-
.number_prefix = "%3" PRIi32 " | ",
|
21700
|
-
.blank_prefix = " | ",
|
21701
|
-
.divider = " ~~~~~~~\n"
|
21702
|
-
};
|
21703
|
-
}
|
21704
|
-
} else if (max_line_number < 10000) {
|
21705
|
-
if (colorize) {
|
21706
|
-
error_format = (pm_error_format_t) {
|
21707
|
-
.number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
|
21708
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21709
|
-
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
21710
|
-
};
|
21711
|
-
} else {
|
21712
|
-
error_format = (pm_error_format_t) {
|
21713
|
-
.number_prefix = "%4" PRIi32 " | ",
|
21714
|
-
.blank_prefix = " | ",
|
21715
|
-
.divider = " ~~~~~~~~\n"
|
21716
|
-
};
|
21717
|
-
}
|
21718
|
-
} else {
|
21719
|
-
if (colorize) {
|
21720
|
-
error_format = (pm_error_format_t) {
|
21721
|
-
.number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
|
21722
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21723
|
-
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
21724
|
-
};
|
21725
|
-
} else {
|
21726
|
-
error_format = (pm_error_format_t) {
|
21727
|
-
.number_prefix = "%5" PRIi32 " | ",
|
21728
|
-
.blank_prefix = " | ",
|
21729
|
-
.divider = " ~~~~~~~~\n"
|
21730
|
-
};
|
21731
|
-
}
|
21732
|
-
}
|
21733
|
-
|
21734
|
-
error_format.blank_prefix_length = strlen(error_format.blank_prefix);
|
21735
|
-
error_format.divider_length = strlen(error_format.divider);
|
21736
|
-
|
21737
|
-
// Now we're going to iterate through every error in our error list and
|
21738
|
-
// display it. While we're iterating, we will display some padding lines of
|
21739
|
-
// the source before the error to give some context. We'll be careful not to
|
21740
|
-
// display the same line twice in case the errors are close enough in the
|
21741
|
-
// source.
|
21742
|
-
int32_t last_line = parser->start_line - 1;
|
21743
|
-
const pm_encoding_t *encoding = parser->encoding;
|
21744
|
-
|
21745
|
-
for (size_t index = 0; index < error_list->size; index++) {
|
21746
|
-
pm_error_t *error = &errors[index];
|
21747
|
-
|
21748
|
-
// Here we determine how many lines of padding of the source to display,
|
21749
|
-
// based on the difference from the last line that was displaid.
|
21750
|
-
if (error->line - last_line > 1) {
|
21751
|
-
if (error->line - last_line > 2) {
|
21752
|
-
if ((index != 0) && (error->line - last_line > 3)) {
|
21753
|
-
pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
|
21754
|
-
}
|
21755
|
-
|
21756
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21757
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
|
21758
|
-
}
|
21759
|
-
|
21760
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21761
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
|
21762
|
-
}
|
21763
|
-
|
21764
|
-
// If this is the first error or we're on a new line, then we'll display
|
21765
|
-
// the line that has the error in it.
|
21766
|
-
if ((index == 0) || (error->line != last_line)) {
|
21767
|
-
if (colorize) {
|
21768
|
-
pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 12);
|
21769
|
-
} else {
|
21770
|
-
pm_buffer_append_string(buffer, "> ", 2);
|
21771
|
-
}
|
21772
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
|
21773
|
-
}
|
21774
|
-
|
21775
|
-
const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
|
21776
|
-
if (start == parser->end) pm_buffer_append_byte(buffer, '\n');
|
21777
|
-
|
21778
|
-
// Now we'll display the actual error message. We'll do this by first
|
21779
|
-
// putting the prefix to the line, then a bunch of blank spaces
|
21780
|
-
// depending on the column, then as many carets as we need to display
|
21781
|
-
// the width of the error, then the error message itself.
|
21782
|
-
//
|
21783
|
-
// Note that this doesn't take into account the width of the actual
|
21784
|
-
// character when displaid in the terminal. For some east-asian
|
21785
|
-
// languages or emoji, this means it can be thrown off pretty badly. We
|
21786
|
-
// will need to solve this eventually.
|
21787
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21788
|
-
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
|
21789
|
-
|
21790
|
-
size_t column = 0;
|
21791
|
-
while (column < error->column_start) {
|
21792
|
-
pm_buffer_append_byte(buffer, ' ');
|
21793
|
-
|
21794
|
-
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21795
|
-
column += (char_width == 0 ? 1 : char_width);
|
21796
|
-
}
|
21797
|
-
|
21798
|
-
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
|
21799
|
-
pm_buffer_append_byte(buffer, '^');
|
21800
|
-
|
21801
|
-
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21802
|
-
column += (char_width == 0 ? 1 : char_width);
|
21803
|
-
|
21804
|
-
while (column < error->column_end) {
|
21805
|
-
pm_buffer_append_byte(buffer, '~');
|
21806
|
-
|
21807
|
-
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21808
|
-
column += (char_width == 0 ? 1 : char_width);
|
21809
|
-
}
|
21810
|
-
|
21811
|
-
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
|
21812
|
-
|
21813
|
-
if (inline_messages) {
|
21814
|
-
pm_buffer_append_byte(buffer, ' ');
|
21815
|
-
assert(error->error != NULL);
|
21816
|
-
|
21817
|
-
const char *message = error->error->message;
|
21818
|
-
pm_buffer_append_string(buffer, message, strlen(message));
|
21819
|
-
}
|
21820
|
-
|
21821
|
-
pm_buffer_append_byte(buffer, '\n');
|
21822
|
-
|
21823
|
-
// Here we determine how many lines of padding to display after the
|
21824
|
-
// error, depending on where the next error is in source.
|
21825
|
-
last_line = error->line;
|
21826
|
-
int32_t next_line = (index == error_list->size - 1) ? (((int32_t) newline_list->size) + parser->start_line) : errors[index + 1].line;
|
21827
|
-
|
21828
|
-
if (next_line - last_line > 1) {
|
21829
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21830
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
21831
|
-
}
|
21832
|
-
|
21833
|
-
if (next_line - last_line > 1) {
|
21834
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21835
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
21836
|
-
}
|
21837
|
-
}
|
21838
|
-
|
21839
|
-
// Finally, we'll free the array of errors that we allocated.
|
21840
|
-
xfree(errors);
|
21841
|
-
}
|
21842
|
-
|
21843
|
-
#undef PM_COLOR_GRAY
|
21844
|
-
#undef PM_COLOR_RED
|
21845
|
-
#undef PM_COLOR_RESET
|