prism 0.29.0 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -1
- data/CONTRIBUTING.md +0 -4
- data/README.md +1 -0
- data/config.yml +66 -9
- data/docs/fuzzing.md +1 -1
- data/docs/ripper_translation.md +22 -0
- data/ext/prism/api_node.c +30 -12
- data/ext/prism/extension.c +107 -372
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +138 -70
- data/include/prism/diagnostic.h +7 -2
- data/include/prism/node.h +0 -21
- data/include/prism/parser.h +23 -25
- data/include/prism/regexp.h +17 -8
- data/include/prism/static_literals.h +3 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +0 -8
- data/include/prism/util/pm_integer.h +16 -9
- data/include/prism/util/pm_string.h +0 -8
- data/include/prism/version.h +2 -2
- data/include/prism.h +0 -11
- data/lib/prism/compiler.rb +3 -0
- data/lib/prism/dispatcher.rb +14 -0
- data/lib/prism/dot_visitor.rb +22 -3
- data/lib/prism/dsl.rb +7 -2
- data/lib/prism/ffi.rb +24 -3
- data/lib/prism/inspect_visitor.rb +10 -8
- data/lib/prism/mutation_compiler.rb +6 -1
- data/lib/prism/node.rb +166 -241
- data/lib/prism/node_ext.rb +21 -5
- data/lib/prism/parse_result/comments.rb +0 -7
- data/lib/prism/parse_result/newlines.rb +101 -11
- data/lib/prism/parse_result.rb +17 -0
- data/lib/prism/reflection.rb +3 -1
- data/lib/prism/serialize.rb +80 -67
- data/lib/prism/translation/parser/compiler.rb +134 -114
- data/lib/prism/translation/parser.rb +6 -1
- data/lib/prism/translation/ripper.rb +8 -6
- data/lib/prism/translation/ruby_parser.rb +23 -5
- data/lib/prism/visitor.rb +3 -0
- data/lib/prism.rb +0 -4
- data/prism.gemspec +1 -4
- data/rbi/prism/node.rbi +63 -6
- data/rbi/prism/visitor.rbi +3 -0
- data/rbi/prism.rbi +6 -0
- data/sig/prism/dsl.rbs +4 -1
- data/sig/prism/mutation_compiler.rbs +1 -0
- data/sig/prism/node.rbs +28 -4
- data/sig/prism/visitor.rbs +1 -0
- data/sig/prism.rbs +21 -0
- data/src/diagnostic.c +27 -17
- data/src/node.c +408 -1666
- data/src/prettyprint.c +49 -6
- data/src/prism.c +958 -991
- data/src/regexp.c +133 -68
- data/src/serialize.c +6 -1
- data/src/static_literals.c +63 -84
- data/src/token_type.c +2 -2
- data/src/util/pm_constant_pool.c +0 -8
- data/src/util/pm_integer.c +39 -11
- data/src/util/pm_string.c +0 -12
- data/src/util/pm_strpbrk.c +32 -6
- metadata +2 -5
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -249
- data/src/util/pm_string_list.c +0 -28
data/src/prism.c
CHANGED
@@ -423,7 +423,7 @@ lex_mode_pop(pm_parser_t *parser) {
|
|
423
423
|
* This is the equivalent of IS_lex_state is CRuby.
|
424
424
|
*/
|
425
425
|
static inline bool
|
426
|
-
lex_state_p(pm_parser_t *parser, pm_lex_state_t state) {
|
426
|
+
lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
|
427
427
|
return parser->lex_state & state;
|
428
428
|
}
|
429
429
|
|
@@ -708,7 +708,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
708
708
|
.previous = parser->current_scope,
|
709
709
|
.locals = { 0 },
|
710
710
|
.parameters = PM_SCOPE_PARAMETERS_NONE,
|
711
|
-
.
|
711
|
+
.implicit_parameters = { 0 },
|
712
712
|
.shareable_constant = (closed || parser->current_scope == NULL) ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
|
713
713
|
.closed = closed
|
714
714
|
};
|
@@ -1183,6 +1183,31 @@ pm_check_value_expression(pm_node_t *node) {
|
|
1183
1183
|
return NULL;
|
1184
1184
|
case PM_BEGIN_NODE: {
|
1185
1185
|
pm_begin_node_t *cast = (pm_begin_node_t *) node;
|
1186
|
+
|
1187
|
+
if (cast->statements == NULL && cast->ensure_clause != NULL) {
|
1188
|
+
node = (pm_node_t *) cast->ensure_clause;
|
1189
|
+
}
|
1190
|
+
else {
|
1191
|
+
if (cast->rescue_clause != NULL) {
|
1192
|
+
if (cast->rescue_clause->statements == NULL) {
|
1193
|
+
return NULL;
|
1194
|
+
}
|
1195
|
+
else if (cast->else_clause != NULL) {
|
1196
|
+
node = (pm_node_t *) cast->else_clause;
|
1197
|
+
}
|
1198
|
+
else {
|
1199
|
+
node = (pm_node_t *) cast->statements;
|
1200
|
+
}
|
1201
|
+
}
|
1202
|
+
else {
|
1203
|
+
node = (pm_node_t *) cast->statements;
|
1204
|
+
}
|
1205
|
+
}
|
1206
|
+
|
1207
|
+
break;
|
1208
|
+
}
|
1209
|
+
case PM_ENSURE_NODE: {
|
1210
|
+
pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
|
1186
1211
|
node = (pm_node_t *) cast->statements;
|
1187
1212
|
break;
|
1188
1213
|
}
|
@@ -1630,7 +1655,7 @@ not_provided(pm_parser_t *parser) {
|
|
1630
1655
|
return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
|
1631
1656
|
}
|
1632
1657
|
|
1633
|
-
#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = parser->start, .end = parser->start })
|
1658
|
+
#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
|
1634
1659
|
#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
|
1635
1660
|
#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
|
1636
1661
|
#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
|
@@ -2827,8 +2852,7 @@ static pm_call_node_t *
|
|
2827
2852
|
pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
|
2828
2853
|
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
|
2829
2854
|
|
2830
|
-
node->base.location
|
2831
|
-
node->base.location.end = parser->start;
|
2855
|
+
node->base.location = PM_LOCATION_NULL_VALUE(parser);
|
2832
2856
|
node->arguments = arguments;
|
2833
2857
|
|
2834
2858
|
node->name = name;
|
@@ -4291,7 +4315,7 @@ pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
4291
4315
|
}
|
4292
4316
|
|
4293
4317
|
/**
|
4294
|
-
* Allocate and initialize a new
|
4318
|
+
* Allocate and initialize a new RationalNode node from a FLOAT_RATIONAL token.
|
4295
4319
|
*/
|
4296
4320
|
static pm_rational_node_t *
|
4297
4321
|
pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
|
@@ -4301,16 +4325,44 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
4301
4325
|
*node = (pm_rational_node_t) {
|
4302
4326
|
{
|
4303
4327
|
.type = PM_RATIONAL_NODE,
|
4304
|
-
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
4328
|
+
.flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
|
4305
4329
|
.location = PM_LOCATION_TOKEN_VALUE(token)
|
4306
4330
|
},
|
4307
|
-
.
|
4308
|
-
|
4309
|
-
.start = token->start,
|
4310
|
-
.end = token->end - 1
|
4311
|
-
}))
|
4331
|
+
.numerator = { 0 },
|
4332
|
+
.denominator = { 0 }
|
4312
4333
|
};
|
4313
4334
|
|
4335
|
+
const uint8_t *start = token->start;
|
4336
|
+
const uint8_t *end = token->end - 1; // r
|
4337
|
+
|
4338
|
+
while (start < end && *start == '0') start++; // 0.1 -> .1
|
4339
|
+
while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
|
4340
|
+
|
4341
|
+
size_t length = (size_t) (end - start);
|
4342
|
+
if (length == 1) {
|
4343
|
+
node->denominator.value = 1;
|
4344
|
+
return node;
|
4345
|
+
}
|
4346
|
+
|
4347
|
+
const uint8_t *point = memchr(start, '.', length);
|
4348
|
+
assert(point && "should have a decimal point");
|
4349
|
+
|
4350
|
+
uint8_t *digits = malloc(length);
|
4351
|
+
if (digits == NULL) {
|
4352
|
+
fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
|
4353
|
+
abort();
|
4354
|
+
}
|
4355
|
+
|
4356
|
+
memcpy(digits, start, (unsigned long) (point - start));
|
4357
|
+
memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
|
4358
|
+
pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
|
4359
|
+
|
4360
|
+
digits[0] = '1';
|
4361
|
+
if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
|
4362
|
+
pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
|
4363
|
+
free(digits);
|
4364
|
+
|
4365
|
+
pm_integers_reduce(&node->numerator, &node->denominator);
|
4314
4366
|
return node;
|
4315
4367
|
}
|
4316
4368
|
|
@@ -4621,7 +4673,7 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant
|
|
4621
4673
|
*node = (pm_global_variable_read_node_t) {
|
4622
4674
|
{
|
4623
4675
|
.type = PM_GLOBAL_VARIABLE_READ_NODE,
|
4624
|
-
.location =
|
4676
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
4625
4677
|
},
|
4626
4678
|
.name = name
|
4627
4679
|
};
|
@@ -4663,11 +4715,11 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan
|
|
4663
4715
|
*node = (pm_global_variable_write_node_t) {
|
4664
4716
|
{
|
4665
4717
|
.type = PM_GLOBAL_VARIABLE_WRITE_NODE,
|
4666
|
-
.location =
|
4718
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
4667
4719
|
},
|
4668
4720
|
.name = name,
|
4669
|
-
.name_loc =
|
4670
|
-
.operator_loc =
|
4721
|
+
.name_loc = PM_LOCATION_NULL_VALUE(parser),
|
4722
|
+
.operator_loc = PM_LOCATION_NULL_VALUE(parser),
|
4671
4723
|
.value = value
|
4672
4724
|
};
|
4673
4725
|
|
@@ -4944,7 +4996,7 @@ pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, cons
|
|
4944
4996
|
}
|
4945
4997
|
|
4946
4998
|
/**
|
4947
|
-
* Allocate and initialize a new
|
4999
|
+
* Allocate and initialize a new RationalNode node from an INTEGER_RATIONAL
|
4948
5000
|
* token.
|
4949
5001
|
*/
|
4950
5002
|
static pm_rational_node_t *
|
@@ -4955,16 +5007,24 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const
|
|
4955
5007
|
*node = (pm_rational_node_t) {
|
4956
5008
|
{
|
4957
5009
|
.type = PM_RATIONAL_NODE,
|
4958
|
-
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
5010
|
+
.flags = base | PM_NODE_FLAG_STATIC_LITERAL,
|
4959
5011
|
.location = PM_LOCATION_TOKEN_VALUE(token)
|
4960
5012
|
},
|
4961
|
-
.
|
4962
|
-
|
4963
|
-
.start = token->start,
|
4964
|
-
.end = token->end - 1
|
4965
|
-
}))
|
5013
|
+
.numerator = { 0 },
|
5014
|
+
.denominator = { .value = 1, 0 }
|
4966
5015
|
};
|
4967
5016
|
|
5017
|
+
pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
|
5018
|
+
switch (base) {
|
5019
|
+
case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
|
5020
|
+
case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
|
5021
|
+
case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
|
5022
|
+
case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
|
5023
|
+
default: assert(false && "unreachable"); break;
|
5024
|
+
}
|
5025
|
+
|
5026
|
+
pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
|
5027
|
+
|
4968
5028
|
return node;
|
4969
5029
|
}
|
4970
5030
|
|
@@ -5462,6 +5522,23 @@ pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node,
|
|
5462
5522
|
node->base.location.end = closing->end;
|
5463
5523
|
}
|
5464
5524
|
|
5525
|
+
/**
|
5526
|
+
* Create a local variable read that is reading the implicit 'it' variable.
|
5527
|
+
*/
|
5528
|
+
static pm_it_local_variable_read_node_t *
|
5529
|
+
pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
|
5530
|
+
pm_it_local_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_it_local_variable_read_node_t);
|
5531
|
+
|
5532
|
+
*node = (pm_it_local_variable_read_node_t) {
|
5533
|
+
{
|
5534
|
+
.type = PM_IT_LOCAL_VARIABLE_READ_NODE,
|
5535
|
+
.location = PM_LOCATION_TOKEN_VALUE(name)
|
5536
|
+
}
|
5537
|
+
};
|
5538
|
+
|
5539
|
+
return node;
|
5540
|
+
}
|
5541
|
+
|
5465
5542
|
/**
|
5466
5543
|
* Allocate and initialize a new ItParametersNode node.
|
5467
5544
|
*/
|
@@ -5774,28 +5851,6 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
|
|
5774
5851
|
return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
|
5775
5852
|
}
|
5776
5853
|
|
5777
|
-
/**
|
5778
|
-
* Returns true if the given node is `it` default parameter.
|
5779
|
-
*/
|
5780
|
-
static inline bool
|
5781
|
-
pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
|
5782
|
-
// Check if it's a local variable reference
|
5783
|
-
if (node->type != PM_CALL_NODE) {
|
5784
|
-
return false;
|
5785
|
-
}
|
5786
|
-
|
5787
|
-
// Check if it's a variable call
|
5788
|
-
pm_call_node_t *call_node = (pm_call_node_t *) node;
|
5789
|
-
if (!PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
|
5790
|
-
return false;
|
5791
|
-
}
|
5792
|
-
|
5793
|
-
// Check if it's called `it`
|
5794
|
-
pm_constant_id_t id = ((pm_call_node_t *)node)->name;
|
5795
|
-
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
|
5796
|
-
return pm_token_is_it(constant->start, constant->start + constant->length);
|
5797
|
-
}
|
5798
|
-
|
5799
5854
|
/**
|
5800
5855
|
* Returns true if the given bounds comprise a numbered parameter (i.e., they
|
5801
5856
|
* are of the form /^_\d$/).
|
@@ -7355,9 +7410,9 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
|
|
7355
7410
|
{
|
7356
7411
|
.type = PM_SYMBOL_NODE,
|
7357
7412
|
.flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
|
7358
|
-
.location =
|
7413
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
7359
7414
|
},
|
7360
|
-
.value_loc =
|
7415
|
+
.value_loc = PM_LOCATION_NULL_VALUE(parser),
|
7361
7416
|
.unescaped = { 0 }
|
7362
7417
|
};
|
7363
7418
|
|
@@ -7758,10 +7813,10 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
|
|
7758
7813
|
*node = (pm_while_node_t) {
|
7759
7814
|
{
|
7760
7815
|
.type = PM_WHILE_NODE,
|
7761
|
-
.location =
|
7816
|
+
.location = PM_LOCATION_NULL_VALUE(parser)
|
7762
7817
|
},
|
7763
|
-
.keyword_loc =
|
7764
|
-
.closing_loc =
|
7818
|
+
.keyword_loc = PM_LOCATION_NULL_VALUE(parser),
|
7819
|
+
.closing_loc = PM_LOCATION_NULL_VALUE(parser),
|
7765
7820
|
.predicate = predicate,
|
7766
7821
|
.statements = statements
|
7767
7822
|
};
|
@@ -7916,51 +7971,6 @@ pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t leng
|
|
7916
7971
|
return constant_id;
|
7917
7972
|
}
|
7918
7973
|
|
7919
|
-
/**
|
7920
|
-
* Create a local variable read that is reading the implicit 'it' variable.
|
7921
|
-
*/
|
7922
|
-
static pm_local_variable_read_node_t *
|
7923
|
-
pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *name) {
|
7924
|
-
if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_ORDINARY) {
|
7925
|
-
pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
|
7926
|
-
return NULL;
|
7927
|
-
}
|
7928
|
-
|
7929
|
-
if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED) {
|
7930
|
-
pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
|
7931
|
-
return NULL;
|
7932
|
-
}
|
7933
|
-
|
7934
|
-
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IT;
|
7935
|
-
|
7936
|
-
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
7937
|
-
pm_parser_local_add(parser, name_id, name->start, name->end, 0);
|
7938
|
-
|
7939
|
-
return pm_local_variable_read_node_create_constant_id(parser, name, name_id, 0, false);
|
7940
|
-
}
|
7941
|
-
|
7942
|
-
/**
|
7943
|
-
* Convert a `it` variable call node to a node for `it` default parameter.
|
7944
|
-
*/
|
7945
|
-
static pm_node_t *
|
7946
|
-
pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
|
7947
|
-
if (
|
7948
|
-
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
|
7949
|
-
!parser->current_scope->closed &&
|
7950
|
-
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
7951
|
-
pm_node_is_it(parser, node)
|
7952
|
-
) {
|
7953
|
-
pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
|
7954
|
-
|
7955
|
-
if (read != NULL) {
|
7956
|
-
pm_node_destroy(parser, node);
|
7957
|
-
node = (pm_node_t *) read;
|
7958
|
-
}
|
7959
|
-
}
|
7960
|
-
|
7961
|
-
return node;
|
7962
|
-
}
|
7963
|
-
|
7964
7974
|
/**
|
7965
7975
|
* Add a parameter name to the current scope and check whether the name of the
|
7966
7976
|
* parameter is unique or not.
|
@@ -7996,6 +8006,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
|
|
7996
8006
|
pm_scope_t *scope = parser->current_scope;
|
7997
8007
|
parser->current_scope = scope->previous;
|
7998
8008
|
pm_locals_free(&scope->locals);
|
8009
|
+
pm_node_list_free(&scope->implicit_parameters);
|
7999
8010
|
xfree(scope);
|
8000
8011
|
}
|
8001
8012
|
|
@@ -8067,7 +8078,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) {
|
|
8067
8078
|
* is beyond the end of the source then return '\0'.
|
8068
8079
|
*/
|
8069
8080
|
static inline uint8_t
|
8070
|
-
peek_at(pm_parser_t *parser, const uint8_t *cursor) {
|
8081
|
+
peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
|
8071
8082
|
if (cursor < parser->end) {
|
8072
8083
|
return *cursor;
|
8073
8084
|
} else {
|
@@ -8090,7 +8101,7 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
|
|
8090
8101
|
* that position is beyond the end of the source then return '\0'.
|
8091
8102
|
*/
|
8092
8103
|
static inline uint8_t
|
8093
|
-
peek(pm_parser_t *parser) {
|
8104
|
+
peek(const pm_parser_t *parser) {
|
8094
8105
|
return peek_at(parser, parser->current.end);
|
8095
8106
|
}
|
8096
8107
|
|
@@ -8155,6 +8166,14 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
|
|
8155
8166
|
return memchr(cursor, '\n', (size_t) length);
|
8156
8167
|
}
|
8157
8168
|
|
8169
|
+
/**
|
8170
|
+
* This is equivalent to the predicate of warn_balanced in CRuby.
|
8171
|
+
*/
|
8172
|
+
static inline bool
|
8173
|
+
ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
|
8174
|
+
return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
|
8175
|
+
}
|
8176
|
+
|
8158
8177
|
/**
|
8159
8178
|
* Here we're going to check if this is a "magic" comment, and perform whatever
|
8160
8179
|
* actions are necessary for it here.
|
@@ -8995,8 +9014,8 @@ lex_global_variable(pm_parser_t *parser) {
|
|
8995
9014
|
// If we get here, then we have a $ followed by something that
|
8996
9015
|
// isn't recognized as a global variable.
|
8997
9016
|
pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
8998
|
-
|
8999
|
-
|
9017
|
+
const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9018
|
+
PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
|
9000
9019
|
}
|
9001
9020
|
|
9002
9021
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -9389,7 +9408,7 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
|
|
9389
9408
|
*/
|
9390
9409
|
static inline uint8_t
|
9391
9410
|
escape_byte(uint8_t value, const uint8_t flags) {
|
9392
|
-
if (flags & PM_ESCAPE_FLAG_CONTROL) value &=
|
9411
|
+
if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
|
9393
9412
|
if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
|
9394
9413
|
return value;
|
9395
9414
|
}
|
@@ -9489,22 +9508,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
|
|
9489
9508
|
static inline void
|
9490
9509
|
escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
|
9491
9510
|
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9492
|
-
|
9493
|
-
|
9494
|
-
uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
|
9495
|
-
uint8_t byte2 = (uint8_t) (byte & 0xF);
|
9496
|
-
|
9497
|
-
if (byte1 >= 0xA) {
|
9498
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
|
9499
|
-
} else {
|
9500
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte1 + '0'));
|
9501
|
-
}
|
9502
|
-
|
9503
|
-
if (byte2 >= 0xA) {
|
9504
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 - 0xA + 'A'));
|
9505
|
-
} else {
|
9506
|
-
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 + '0'));
|
9507
|
-
}
|
9511
|
+
pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
|
9508
9512
|
}
|
9509
9513
|
|
9510
9514
|
escape_write_byte_encoded(parser, buffer, byte);
|
@@ -9539,57 +9543,57 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9539
9543
|
switch (peek(parser)) {
|
9540
9544
|
case '\\': {
|
9541
9545
|
parser->current.end++;
|
9542
|
-
|
9546
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
|
9543
9547
|
return;
|
9544
9548
|
}
|
9545
9549
|
case '\'': {
|
9546
9550
|
parser->current.end++;
|
9547
|
-
|
9551
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
|
9548
9552
|
return;
|
9549
9553
|
}
|
9550
9554
|
case 'a': {
|
9551
9555
|
parser->current.end++;
|
9552
|
-
|
9556
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
|
9553
9557
|
return;
|
9554
9558
|
}
|
9555
9559
|
case 'b': {
|
9556
9560
|
parser->current.end++;
|
9557
|
-
|
9561
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
|
9558
9562
|
return;
|
9559
9563
|
}
|
9560
9564
|
case 'e': {
|
9561
9565
|
parser->current.end++;
|
9562
|
-
|
9566
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
|
9563
9567
|
return;
|
9564
9568
|
}
|
9565
9569
|
case 'f': {
|
9566
9570
|
parser->current.end++;
|
9567
|
-
|
9571
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
|
9568
9572
|
return;
|
9569
9573
|
}
|
9570
9574
|
case 'n': {
|
9571
9575
|
parser->current.end++;
|
9572
|
-
|
9576
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
|
9573
9577
|
return;
|
9574
9578
|
}
|
9575
9579
|
case 'r': {
|
9576
9580
|
parser->current.end++;
|
9577
|
-
|
9581
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
|
9578
9582
|
return;
|
9579
9583
|
}
|
9580
9584
|
case 's': {
|
9581
9585
|
parser->current.end++;
|
9582
|
-
|
9586
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
|
9583
9587
|
return;
|
9584
9588
|
}
|
9585
9589
|
case 't': {
|
9586
9590
|
parser->current.end++;
|
9587
|
-
|
9591
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
|
9588
9592
|
return;
|
9589
9593
|
}
|
9590
9594
|
case 'v': {
|
9591
9595
|
parser->current.end++;
|
9592
|
-
|
9596
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
|
9593
9597
|
return;
|
9594
9598
|
}
|
9595
9599
|
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
|
@@ -9606,7 +9610,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9606
9610
|
}
|
9607
9611
|
}
|
9608
9612
|
|
9609
|
-
|
9613
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
|
9610
9614
|
return;
|
9611
9615
|
}
|
9612
9616
|
case 'x': {
|
@@ -9625,11 +9629,16 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9625
9629
|
parser->current.end++;
|
9626
9630
|
}
|
9627
9631
|
|
9632
|
+
value = escape_byte(value, flags);
|
9628
9633
|
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9629
|
-
|
9634
|
+
if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
|
9635
|
+
pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
|
9636
|
+
} else {
|
9637
|
+
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
|
9638
|
+
}
|
9630
9639
|
}
|
9631
9640
|
|
9632
|
-
escape_write_byte_encoded(parser, buffer,
|
9641
|
+
escape_write_byte_encoded(parser, buffer, value);
|
9633
9642
|
} else {
|
9634
9643
|
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
|
9635
9644
|
}
|
@@ -9658,7 +9667,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9658
9667
|
pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
|
9659
9668
|
} else if (hexadecimal_length == 0) {
|
9660
9669
|
// there are not hexadecimal characters
|
9661
|
-
pm_parser_err(parser,
|
9670
|
+
pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
|
9671
|
+
pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
|
9662
9672
|
return;
|
9663
9673
|
}
|
9664
9674
|
|
@@ -9707,10 +9717,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9707
9717
|
}
|
9708
9718
|
}
|
9709
9719
|
|
9710
|
-
if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
|
9711
|
-
pm_parser_err(parser, start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9712
|
-
}
|
9713
|
-
|
9714
9720
|
return;
|
9715
9721
|
}
|
9716
9722
|
case 'c': {
|
@@ -9733,6 +9739,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9733
9739
|
return;
|
9734
9740
|
}
|
9735
9741
|
parser->current.end++;
|
9742
|
+
|
9743
|
+
if (match(parser, 'u') || match(parser, 'U')) {
|
9744
|
+
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9745
|
+
return;
|
9746
|
+
}
|
9747
|
+
|
9736
9748
|
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
|
9737
9749
|
return;
|
9738
9750
|
case ' ':
|
@@ -9760,7 +9772,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9760
9772
|
case 'C': {
|
9761
9773
|
parser->current.end++;
|
9762
9774
|
if (peek(parser) != '-') {
|
9763
|
-
|
9775
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9776
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
|
9764
9777
|
return;
|
9765
9778
|
}
|
9766
9779
|
|
@@ -9783,6 +9796,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9783
9796
|
return;
|
9784
9797
|
}
|
9785
9798
|
parser->current.end++;
|
9799
|
+
|
9800
|
+
if (match(parser, 'u') || match(parser, 'U')) {
|
9801
|
+
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9802
|
+
return;
|
9803
|
+
}
|
9804
|
+
|
9786
9805
|
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
|
9787
9806
|
return;
|
9788
9807
|
case ' ':
|
@@ -9797,7 +9816,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9797
9816
|
return;
|
9798
9817
|
default: {
|
9799
9818
|
if (!char_is_ascii_printable(peeked)) {
|
9800
|
-
|
9819
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9820
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
|
9801
9821
|
return;
|
9802
9822
|
}
|
9803
9823
|
|
@@ -9810,7 +9830,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9810
9830
|
case 'M': {
|
9811
9831
|
parser->current.end++;
|
9812
9832
|
if (peek(parser) != '-') {
|
9813
|
-
|
9833
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9834
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
|
9814
9835
|
return;
|
9815
9836
|
}
|
9816
9837
|
|
@@ -9828,6 +9849,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9828
9849
|
return;
|
9829
9850
|
}
|
9830
9851
|
parser->current.end++;
|
9852
|
+
|
9853
|
+
if (match(parser, 'u') || match(parser, 'U')) {
|
9854
|
+
pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9855
|
+
return;
|
9856
|
+
}
|
9857
|
+
|
9831
9858
|
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
|
9832
9859
|
return;
|
9833
9860
|
case ' ':
|
@@ -9842,7 +9869,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9842
9869
|
return;
|
9843
9870
|
default:
|
9844
9871
|
if (!char_is_ascii_printable(peeked)) {
|
9845
|
-
|
9872
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9873
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
|
9846
9874
|
return;
|
9847
9875
|
}
|
9848
9876
|
|
@@ -10803,6 +10831,8 @@ parser_lex(pm_parser_t *parser) {
|
|
10803
10831
|
type = PM_TOKEN_USTAR_STAR;
|
10804
10832
|
} else if (lex_state_beg_p(parser)) {
|
10805
10833
|
type = PM_TOKEN_USTAR_STAR;
|
10834
|
+
} else if (ambiguous_operator_p(parser, space_seen)) {
|
10835
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
|
10806
10836
|
}
|
10807
10837
|
|
10808
10838
|
if (lex_state_operator_p(parser)) {
|
@@ -10826,6 +10856,8 @@ parser_lex(pm_parser_t *parser) {
|
|
10826
10856
|
type = PM_TOKEN_USTAR;
|
10827
10857
|
} else if (lex_state_beg_p(parser)) {
|
10828
10858
|
type = PM_TOKEN_USTAR;
|
10859
|
+
} else if (ambiguous_operator_p(parser, space_seen)) {
|
10860
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
|
10829
10861
|
}
|
10830
10862
|
|
10831
10863
|
if (lex_state_operator_p(parser)) {
|
@@ -10942,6 +10974,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10942
10974
|
// If we have quotes, then we're going to go until we find the
|
10943
10975
|
// end quote.
|
10944
10976
|
while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
|
10977
|
+
if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
|
10945
10978
|
parser->current.end++;
|
10946
10979
|
}
|
10947
10980
|
}
|
@@ -10999,6 +11032,10 @@ parser_lex(pm_parser_t *parser) {
|
|
10999
11032
|
LEX(PM_TOKEN_LESS_LESS_EQUAL);
|
11000
11033
|
}
|
11001
11034
|
|
11035
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11036
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
|
11037
|
+
}
|
11038
|
+
|
11002
11039
|
if (lex_state_operator_p(parser)) {
|
11003
11040
|
lex_state_set(parser, PM_LEX_STATE_ARG);
|
11004
11041
|
} else {
|
@@ -11112,6 +11149,8 @@ parser_lex(pm_parser_t *parser) {
|
|
11112
11149
|
type = PM_TOKEN_UAMPERSAND;
|
11113
11150
|
} else if (lex_state_beg_p(parser)) {
|
11114
11151
|
type = PM_TOKEN_UAMPERSAND;
|
11152
|
+
} else if (ambiguous_operator_p(parser, space_seen)) {
|
11153
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
|
11115
11154
|
}
|
11116
11155
|
|
11117
11156
|
if (lex_state_operator_p(parser)) {
|
@@ -11186,6 +11225,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11186
11225
|
LEX(PM_TOKEN_UPLUS);
|
11187
11226
|
}
|
11188
11227
|
|
11228
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11229
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
|
11230
|
+
}
|
11231
|
+
|
11189
11232
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
11190
11233
|
LEX(PM_TOKEN_PLUS);
|
11191
11234
|
}
|
@@ -11223,6 +11266,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11223
11266
|
LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
|
11224
11267
|
}
|
11225
11268
|
|
11269
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11270
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
|
11271
|
+
}
|
11272
|
+
|
11226
11273
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
11227
11274
|
LEX(PM_TOKEN_MINUS);
|
11228
11275
|
}
|
@@ -11321,6 +11368,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11321
11368
|
LEX(PM_TOKEN_REGEXP_BEGIN);
|
11322
11369
|
}
|
11323
11370
|
|
11371
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11372
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
|
11373
|
+
}
|
11374
|
+
|
11324
11375
|
if (lex_state_operator_p(parser)) {
|
11325
11376
|
lex_state_set(parser, PM_LEX_STATE_ARG);
|
11326
11377
|
} else {
|
@@ -11356,7 +11407,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11356
11407
|
// operator because we don't want to move into the string
|
11357
11408
|
// lex mode unnecessarily.
|
11358
11409
|
if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
|
11359
|
-
pm_parser_err_current(parser,
|
11410
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
|
11360
11411
|
LEX(PM_TOKEN_PERCENT);
|
11361
11412
|
}
|
11362
11413
|
|
@@ -11375,10 +11426,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11375
11426
|
|
11376
11427
|
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
11377
11428
|
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
11378
|
-
|
11379
|
-
if (parser->current.end < parser->end) {
|
11380
|
-
LEX(PM_TOKEN_STRING_BEGIN);
|
11381
|
-
}
|
11429
|
+
LEX(PM_TOKEN_STRING_BEGIN);
|
11382
11430
|
}
|
11383
11431
|
|
11384
11432
|
// Delimiters for %-literals cannot be alphanumeric. We
|
@@ -11505,6 +11553,10 @@ parser_lex(pm_parser_t *parser) {
|
|
11505
11553
|
}
|
11506
11554
|
}
|
11507
11555
|
|
11556
|
+
if (ambiguous_operator_p(parser, space_seen)) {
|
11557
|
+
PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
|
11558
|
+
}
|
11559
|
+
|
11508
11560
|
lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
|
11509
11561
|
LEX(PM_TOKEN_PERCENT);
|
11510
11562
|
}
|
@@ -12315,9 +12367,10 @@ parser_lex(pm_parser_t *parser) {
|
|
12315
12367
|
|
12316
12368
|
// If we are immediately following a newline and we have hit the
|
12317
12369
|
// terminator, then we need to return the ending of the heredoc.
|
12318
|
-
if (
|
12370
|
+
if (current_token_starts_line(parser)) {
|
12319
12371
|
const uint8_t *start = parser->current.start;
|
12320
|
-
|
12372
|
+
|
12373
|
+
if (!line_continuation && (start + ident_length <= parser->end)) {
|
12321
12374
|
const uint8_t *newline = next_newline(start, parser->end - start);
|
12322
12375
|
const uint8_t *ident_end = newline;
|
12323
12376
|
const uint8_t *terminator_end = newline;
|
@@ -12473,11 +12526,8 @@ parser_lex(pm_parser_t *parser) {
|
|
12473
12526
|
}
|
12474
12527
|
|
12475
12528
|
parser->current.end = breakpoint + 1;
|
12476
|
-
|
12477
|
-
|
12478
|
-
pm_token_buffer_flush(parser, &token_buffer);
|
12479
|
-
LEX(PM_TOKEN_STRING_CONTENT);
|
12480
|
-
}
|
12529
|
+
pm_token_buffer_flush(parser, &token_buffer);
|
12530
|
+
LEX(PM_TOKEN_STRING_CONTENT);
|
12481
12531
|
}
|
12482
12532
|
|
12483
12533
|
// Otherwise we hit a newline and it wasn't followed by
|
@@ -13112,11 +13162,40 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
|
|
13112
13162
|
return (pm_node_t *) result;
|
13113
13163
|
}
|
13114
13164
|
|
13165
|
+
/**
|
13166
|
+
* When an implicit local variable is written to or targeted, it becomes a
|
13167
|
+
* regular, named local variable. This function removes it from the list of
|
13168
|
+
* implicit parameters when that happens.
|
13169
|
+
*/
|
13170
|
+
static void
|
13171
|
+
parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
|
13172
|
+
pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
|
13173
|
+
|
13174
|
+
for (size_t index = 0; index < implicit_parameters->size; index++) {
|
13175
|
+
if (implicit_parameters->nodes[index] == node) {
|
13176
|
+
// If the node is not the last one in the list, we need to shift the
|
13177
|
+
// remaining nodes down to fill the gap. This is extremely unlikely
|
13178
|
+
// to happen.
|
13179
|
+
if (index != implicit_parameters->size - 1) {
|
13180
|
+
memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
|
13181
|
+
}
|
13182
|
+
|
13183
|
+
implicit_parameters->size--;
|
13184
|
+
break;
|
13185
|
+
}
|
13186
|
+
}
|
13187
|
+
}
|
13188
|
+
|
13115
13189
|
/**
|
13116
13190
|
* Convert the given node into a valid target node.
|
13191
|
+
*
|
13192
|
+
* @param multiple Whether or not this target is part of a larger set of
|
13193
|
+
* targets. If it is, then the &. operator is not allowed.
|
13194
|
+
* @param splat Whether or not this target is a child of a splat target. If it
|
13195
|
+
* is, then fewer patterns are allowed.
|
13117
13196
|
*/
|
13118
13197
|
static pm_node_t *
|
13119
|
-
parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
13198
|
+
parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
|
13120
13199
|
switch (PM_NODE_TYPE(target)) {
|
13121
13200
|
case PM_MISSING_NODE:
|
13122
13201
|
return target;
|
@@ -13162,7 +13241,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
|
13162
13241
|
target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
|
13163
13242
|
return target;
|
13164
13243
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
13165
|
-
|
13244
|
+
if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
|
13245
|
+
PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
|
13246
|
+
parse_target_implicit_parameter(parser, target);
|
13247
|
+
}
|
13166
13248
|
|
13167
13249
|
const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
|
13168
13250
|
uint32_t name = cast->name;
|
@@ -13174,17 +13256,32 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
|
13174
13256
|
|
13175
13257
|
return target;
|
13176
13258
|
}
|
13259
|
+
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
|
13260
|
+
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
|
13261
|
+
pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
|
13262
|
+
|
13263
|
+
parse_target_implicit_parameter(parser, target);
|
13264
|
+
pm_node_destroy(parser, target);
|
13265
|
+
|
13266
|
+
return node;
|
13267
|
+
}
|
13177
13268
|
case PM_INSTANCE_VARIABLE_READ_NODE:
|
13178
13269
|
assert(sizeof(pm_instance_variable_target_node_t) == sizeof(pm_instance_variable_read_node_t));
|
13179
13270
|
target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
|
13180
13271
|
return target;
|
13181
13272
|
case PM_MULTI_TARGET_NODE:
|
13273
|
+
if (splat_parent) {
|
13274
|
+
// Multi target is not accepted in all positions. If this is one
|
13275
|
+
// of them, then we need to add an error.
|
13276
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
13277
|
+
}
|
13278
|
+
|
13182
13279
|
return target;
|
13183
13280
|
case PM_SPLAT_NODE: {
|
13184
13281
|
pm_splat_node_t *splat = (pm_splat_node_t *) target;
|
13185
13282
|
|
13186
13283
|
if (splat->expression != NULL) {
|
13187
|
-
splat->expression = parse_target(parser, splat->expression, multiple);
|
13284
|
+
splat->expression = parse_target(parser, splat->expression, multiple, true);
|
13188
13285
|
}
|
13189
13286
|
|
13190
13287
|
return (pm_node_t *) splat;
|
@@ -13254,9 +13351,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
|
13254
13351
|
*/
|
13255
13352
|
static pm_node_t *
|
13256
13353
|
parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
13257
|
-
pm_node_t *result = parse_target(parser, target, multiple);
|
13354
|
+
pm_node_t *result = parse_target(parser, target, multiple, false);
|
13258
13355
|
|
13259
|
-
// Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
|
13356
|
+
// Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
|
13357
|
+
// parens after the targets.
|
13260
13358
|
if (
|
13261
13359
|
!match1(parser, PM_TOKEN_EQUAL) &&
|
13262
13360
|
!(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
|
@@ -13326,18 +13424,34 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13326
13424
|
return (pm_node_t *) node;
|
13327
13425
|
}
|
13328
13426
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
13329
|
-
pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
|
13330
13427
|
pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
|
13331
13428
|
|
13332
13429
|
pm_constant_id_t name = local_read->name;
|
13430
|
+
pm_location_t name_loc = target->location;
|
13431
|
+
|
13333
13432
|
uint32_t depth = local_read->depth;
|
13334
|
-
|
13433
|
+
pm_scope_t *scope = pm_parser_scope_find(parser, depth);
|
13335
13434
|
|
13336
|
-
|
13435
|
+
if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
|
13436
|
+
pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
|
13437
|
+
PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
|
13438
|
+
parse_target_implicit_parameter(parser, target);
|
13439
|
+
}
|
13440
|
+
|
13441
|
+
pm_locals_unread(&scope->locals, name);
|
13337
13442
|
pm_node_destroy(parser, target);
|
13338
13443
|
|
13339
13444
|
return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
|
13340
13445
|
}
|
13446
|
+
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
|
13447
|
+
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
|
13448
|
+
pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
|
13449
|
+
|
13450
|
+
parse_target_implicit_parameter(parser, target);
|
13451
|
+
pm_node_destroy(parser, target);
|
13452
|
+
|
13453
|
+
return node;
|
13454
|
+
}
|
13341
13455
|
case PM_INSTANCE_VARIABLE_READ_NODE: {
|
13342
13456
|
pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
|
13343
13457
|
pm_node_destroy(parser, target);
|
@@ -13491,7 +13605,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13491
13605
|
bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
|
13492
13606
|
|
13493
13607
|
pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
|
13494
|
-
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
|
13608
|
+
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
|
13495
13609
|
|
13496
13610
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
13497
13611
|
if (accept1(parser, PM_TOKEN_USTAR)) {
|
@@ -13507,7 +13621,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13507
13621
|
|
13508
13622
|
if (token_begins_expression_p(parser->current.type)) {
|
13509
13623
|
name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
|
13510
|
-
name = parse_target(parser, name, true);
|
13624
|
+
name = parse_target(parser, name, true, true);
|
13511
13625
|
}
|
13512
13626
|
|
13513
13627
|
pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
|
@@ -13515,7 +13629,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13515
13629
|
has_rest = true;
|
13516
13630
|
} else if (token_begins_expression_p(parser->current.type)) {
|
13517
13631
|
pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
|
13518
|
-
target = parse_target(parser, target, true);
|
13632
|
+
target = parse_target(parser, target, true, false);
|
13519
13633
|
|
13520
13634
|
pm_multi_target_node_targets_append(parser, result, target);
|
13521
13635
|
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
@@ -13552,8 +13666,8 @@ parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_
|
|
13552
13666
|
*/
|
13553
13667
|
static pm_statements_node_t *
|
13554
13668
|
parse_statements(pm_parser_t *parser, pm_context_t context) {
|
13555
|
-
// First, skip past any optional terminators that might be at the beginning
|
13556
|
-
// the statements.
|
13669
|
+
// First, skip past any optional terminators that might be at the beginning
|
13670
|
+
// of the statements.
|
13557
13671
|
while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
|
13558
13672
|
|
13559
13673
|
// If we have a terminator, then we can just return NULL.
|
@@ -13569,20 +13683,20 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13569
13683
|
pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
|
13570
13684
|
pm_statements_node_body_append(parser, statements, node);
|
13571
13685
|
|
13572
|
-
// If we're recovering from a syntax error, then we need to stop parsing
|
13573
|
-
// statements now.
|
13686
|
+
// If we're recovering from a syntax error, then we need to stop parsing
|
13687
|
+
// the statements now.
|
13574
13688
|
if (parser->recovering) {
|
13575
|
-
// If this is the level of context where the recovery has happened,
|
13576
|
-
// we can mark the parser as done recovering.
|
13689
|
+
// If this is the level of context where the recovery has happened,
|
13690
|
+
// then we can mark the parser as done recovering.
|
13577
13691
|
if (context_terminator(context, &parser->current)) parser->recovering = false;
|
13578
13692
|
break;
|
13579
13693
|
}
|
13580
13694
|
|
13581
|
-
// If we have a terminator, then we will parse all consecutive
|
13582
|
-
// and then continue parsing the statements list.
|
13695
|
+
// If we have a terminator, then we will parse all consecutive
|
13696
|
+
// terminators and then continue parsing the statements list.
|
13583
13697
|
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
13584
|
-
// If we have a terminator, then we will continue parsing the
|
13585
|
-
// list.
|
13698
|
+
// If we have a terminator, then we will continue parsing the
|
13699
|
+
// statements list.
|
13586
13700
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
13587
13701
|
if (context_terminator(context, &parser->current)) break;
|
13588
13702
|
|
@@ -13590,27 +13704,28 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13590
13704
|
continue;
|
13591
13705
|
}
|
13592
13706
|
|
13593
|
-
// At this point we have a list of statements that are not terminated by
|
13594
|
-
// newline or semicolon. At this point we need to check if we're at
|
13595
|
-
// of the statements list. If we are, then we should break out
|
13707
|
+
// At this point we have a list of statements that are not terminated by
|
13708
|
+
// a newline or semicolon. At this point we need to check if we're at
|
13709
|
+
// the end of the statements list. If we are, then we should break out
|
13710
|
+
// of the loop.
|
13596
13711
|
if (context_terminator(context, &parser->current)) break;
|
13597
13712
|
|
13598
13713
|
// At this point, we have a syntax error, because the statement was not
|
13599
13714
|
// terminated by a newline or semicolon, and we're not at the end of the
|
13600
|
-
// statements list. Ideally we should scan forward to determine if we
|
13601
|
-
// insert a missing terminator or break out of parsing the
|
13602
|
-
// at this point.
|
13715
|
+
// statements list. Ideally we should scan forward to determine if we
|
13716
|
+
// should insert a missing terminator or break out of parsing the
|
13717
|
+
// statements list at this point.
|
13603
13718
|
//
|
13604
|
-
// We don't have that yet, so instead we'll do a more naive approach. If
|
13605
|
-
// were unable to parse an expression, then we will skip past this
|
13606
|
-
// continue parsing the statements list. Otherwise we'll add
|
13607
|
-
// continue parsing the statements list.
|
13719
|
+
// We don't have that yet, so instead we'll do a more naive approach. If
|
13720
|
+
// we were unable to parse an expression, then we will skip past this
|
13721
|
+
// token and continue parsing the statements list. Otherwise we'll add
|
13722
|
+
// an error and continue parsing the statements list.
|
13608
13723
|
if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
|
13609
13724
|
parser_lex(parser);
|
13610
13725
|
|
13611
13726
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
13612
13727
|
if (context_terminator(context, &parser->current)) break;
|
13613
|
-
} else if (!
|
13728
|
+
} else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
|
13614
13729
|
// This is an inlined version of accept1 because the error that we
|
13615
13730
|
// want to add has varargs. If this happens again, we should
|
13616
13731
|
// probably extract a helper function.
|
@@ -13632,7 +13747,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13632
13747
|
*/
|
13633
13748
|
static void
|
13634
13749
|
pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13635
|
-
const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
|
13750
|
+
const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
|
13636
13751
|
|
13637
13752
|
if (duplicated != NULL) {
|
13638
13753
|
pm_buffer_t buffer = { 0 };
|
@@ -13658,13 +13773,16 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
|
|
13658
13773
|
*/
|
13659
13774
|
static void
|
13660
13775
|
pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13661
|
-
|
13776
|
+
pm_node_t *previous;
|
13777
|
+
|
13778
|
+
if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
|
13662
13779
|
pm_diagnostic_list_append_format(
|
13663
13780
|
&parser->warning_list,
|
13664
13781
|
node->location.start,
|
13665
13782
|
node->location.end,
|
13666
13783
|
PM_WARN_DUPLICATED_WHEN_CLAUSE,
|
13667
|
-
pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
|
13784
|
+
pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
|
13785
|
+
pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
|
13668
13786
|
);
|
13669
13787
|
}
|
13670
13788
|
}
|
@@ -14276,7 +14394,7 @@ parse_parameters(
|
|
14276
14394
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
14277
14395
|
|
14278
14396
|
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
|
14279
|
-
uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
|
14397
|
+
uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
|
14280
14398
|
|
14281
14399
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
|
14282
14400
|
pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
|
@@ -14289,7 +14407,7 @@ parse_parameters(
|
|
14289
14407
|
// If the value of the parameter increased the number of
|
14290
14408
|
// reads of that parameter, then we need to warn that we
|
14291
14409
|
// have a circular definition.
|
14292
|
-
if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
|
14410
|
+
if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
|
14293
14411
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
|
14294
14412
|
}
|
14295
14413
|
|
@@ -14368,10 +14486,10 @@ parse_parameters(
|
|
14368
14486
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
14369
14487
|
|
14370
14488
|
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
|
14371
|
-
uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
|
14489
|
+
uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
|
14372
14490
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
|
14373
14491
|
|
14374
|
-
if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
|
14492
|
+
if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
|
14375
14493
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
|
14376
14494
|
}
|
14377
14495
|
|
@@ -14543,7 +14661,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14543
14661
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14544
14662
|
|
14545
14663
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14546
|
-
reference = parse_target(parser, reference, false);
|
14664
|
+
reference = parse_target(parser, reference, false, false);
|
14547
14665
|
|
14548
14666
|
pm_rescue_node_reference_set(rescue, reference);
|
14549
14667
|
break;
|
@@ -14573,7 +14691,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14573
14691
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14574
14692
|
|
14575
14693
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14576
|
-
reference = parse_target(parser, reference, false);
|
14694
|
+
reference = parse_target(parser, reference, false, false);
|
14577
14695
|
|
14578
14696
|
pm_rescue_node_reference_set(rescue, reference);
|
14579
14697
|
break;
|
@@ -14778,6 +14896,28 @@ parse_block_parameters(
|
|
14778
14896
|
return block_parameters;
|
14779
14897
|
}
|
14780
14898
|
|
14899
|
+
/**
|
14900
|
+
* Return true if any of the visible scopes to the current context are using
|
14901
|
+
* numbered parameters.
|
14902
|
+
*/
|
14903
|
+
static bool
|
14904
|
+
outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
|
14905
|
+
for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
|
14906
|
+
if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
|
14907
|
+
}
|
14908
|
+
|
14909
|
+
return false;
|
14910
|
+
}
|
14911
|
+
|
14912
|
+
/**
|
14913
|
+
* These are the names of the various numbered parameters. We have them here so
|
14914
|
+
* that when we insert them into the constant pool we can use a constant string
|
14915
|
+
* and not have to allocate.
|
14916
|
+
*/
|
14917
|
+
static const char * const pm_numbered_parameter_names[] = {
|
14918
|
+
"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
|
14919
|
+
};
|
14920
|
+
|
14781
14921
|
/**
|
14782
14922
|
* Return the node that should be used in the parameters field of a block-like
|
14783
14923
|
* (block or lambda) node, depending on the kind of parameters that were
|
@@ -14785,31 +14925,79 @@ parse_block_parameters(
|
|
14785
14925
|
*/
|
14786
14926
|
static pm_node_t *
|
14787
14927
|
parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
|
14788
|
-
|
14928
|
+
pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
|
14929
|
+
|
14930
|
+
// If we have ordinary parameters, then we will return them as the set of
|
14931
|
+
// parameters.
|
14932
|
+
if (parameters != NULL) {
|
14933
|
+
// If we also have implicit parameters, then this is an error.
|
14934
|
+
if (implicit_parameters->size > 0) {
|
14935
|
+
pm_node_t *node = implicit_parameters->nodes[0];
|
14936
|
+
|
14937
|
+
if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
|
14938
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
|
14939
|
+
} else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
|
14940
|
+
pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
|
14941
|
+
} else {
|
14942
|
+
assert(false && "unreachable");
|
14943
|
+
}
|
14944
|
+
}
|
14789
14945
|
|
14790
|
-
if (masked == PM_SCOPE_PARAMETERS_NONE) {
|
14791
|
-
assert(parameters == NULL);
|
14792
|
-
return NULL;
|
14793
|
-
} else if (masked == PM_SCOPE_PARAMETERS_ORDINARY) {
|
14794
|
-
assert(parameters != NULL);
|
14795
14946
|
return parameters;
|
14796
|
-
}
|
14797
|
-
|
14947
|
+
}
|
14948
|
+
|
14949
|
+
// If we don't have any implicit parameters, then the set of parameters is
|
14950
|
+
// NULL.
|
14951
|
+
if (implicit_parameters->size == 0) {
|
14952
|
+
return NULL;
|
14953
|
+
}
|
14954
|
+
|
14955
|
+
// If we don't have ordinary parameters, then we now must validate our set
|
14956
|
+
// of implicit parameters. We can only have numbered parameters or it, but
|
14957
|
+
// they cannot be mixed.
|
14958
|
+
uint8_t numbered_parameter = 0;
|
14959
|
+
bool it_parameter = false;
|
14960
|
+
|
14961
|
+
for (size_t index = 0; index < implicit_parameters->size; index++) {
|
14962
|
+
pm_node_t *node = implicit_parameters->nodes[index];
|
14963
|
+
|
14964
|
+
if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
|
14965
|
+
if (it_parameter) {
|
14966
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
|
14967
|
+
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
14968
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
|
14969
|
+
} else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
|
14970
|
+
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
|
14971
|
+
} else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
|
14972
|
+
numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
|
14973
|
+
} else {
|
14974
|
+
assert(false && "unreachable");
|
14975
|
+
}
|
14976
|
+
} else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
|
14977
|
+
if (numbered_parameter > 0) {
|
14978
|
+
pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
|
14979
|
+
} else {
|
14980
|
+
it_parameter = true;
|
14981
|
+
}
|
14982
|
+
}
|
14983
|
+
}
|
14798
14984
|
|
14799
|
-
|
14800
|
-
|
14801
|
-
|
14802
|
-
|
14985
|
+
if (numbered_parameter > 0) {
|
14986
|
+
// Go through the parent scopes and mark them as being disallowed from
|
14987
|
+
// using numbered parameters because this inner scope is using them.
|
14988
|
+
for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
|
14989
|
+
scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
|
14803
14990
|
}
|
14804
14991
|
|
14805
|
-
|
14806
|
-
|
14807
|
-
|
14992
|
+
const pm_location_t location = { .start = opening->start, .end = closing->end };
|
14993
|
+
return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
|
14994
|
+
}
|
14995
|
+
|
14996
|
+
if (it_parameter) {
|
14808
14997
|
return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
|
14809
|
-
} else {
|
14810
|
-
assert(false && "unreachable");
|
14811
|
-
return NULL;
|
14812
14998
|
}
|
14999
|
+
|
15000
|
+
return NULL;
|
14813
15001
|
}
|
14814
15002
|
|
14815
15003
|
/**
|
@@ -14826,9 +15014,6 @@ parse_block(pm_parser_t *parser) {
|
|
14826
15014
|
pm_block_parameters_node_t *block_parameters = NULL;
|
14827
15015
|
|
14828
15016
|
if (accept1(parser, PM_TOKEN_PIPE)) {
|
14829
|
-
assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
|
14830
|
-
parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
|
14831
|
-
|
14832
15017
|
pm_token_t block_parameters_opening = parser->previous;
|
14833
15018
|
if (match1(parser, PM_TOKEN_PIPE)) {
|
14834
15019
|
block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
|
@@ -15326,7 +15511,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
|
|
15326
15511
|
#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
|
15327
15512
|
case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
|
15328
15513
|
case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
|
15329
|
-
case PM_NUMBERED_REFERENCE_READ_NODE
|
15514
|
+
case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
|
15330
15515
|
|
15331
15516
|
// Assert here that the flags are the same so that we can safely switch the type
|
15332
15517
|
// of the node without having to move the flags.
|
@@ -15384,6 +15569,10 @@ parse_string_part(pm_parser_t *parser) {
|
|
15384
15569
|
// "aaa #{bbb} #@ccc ddd"
|
15385
15570
|
// ^^^^^^
|
15386
15571
|
case PM_TOKEN_EMBEXPR_BEGIN: {
|
15572
|
+
// Ruby disallows seeing encoding around interpolation in strings,
|
15573
|
+
// even though it is known at parse time.
|
15574
|
+
parser->explicit_encoding = NULL;
|
15575
|
+
|
15387
15576
|
pm_lex_state_t state = parser->lex_state;
|
15388
15577
|
int brace_nesting = parser->brace_nesting;
|
15389
15578
|
|
@@ -15406,6 +15595,13 @@ parse_string_part(pm_parser_t *parser) {
|
|
15406
15595
|
expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
|
15407
15596
|
pm_token_t closing = parser->previous;
|
15408
15597
|
|
15598
|
+
// If this set of embedded statements only contains a single
|
15599
|
+
// statement, then Ruby does not consider it as a possible statement
|
15600
|
+
// that could emit a line event.
|
15601
|
+
if (statements != NULL && statements->body.size == 1) {
|
15602
|
+
pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
|
15603
|
+
}
|
15604
|
+
|
15409
15605
|
return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
|
15410
15606
|
}
|
15411
15607
|
|
@@ -15416,6 +15612,10 @@ parse_string_part(pm_parser_t *parser) {
|
|
15416
15612
|
// "aaa #{bbb} #@ccc ddd"
|
15417
15613
|
// ^^^^^
|
15418
15614
|
case PM_TOKEN_EMBVAR: {
|
15615
|
+
// Ruby disallows seeing encoding around interpolation in strings,
|
15616
|
+
// even though it is known at parse time.
|
15617
|
+
parser->explicit_encoding = NULL;
|
15618
|
+
|
15419
15619
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
15420
15620
|
parser_lex(parser);
|
15421
15621
|
|
@@ -15731,74 +15931,43 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
|
|
15731
15931
|
}
|
15732
15932
|
|
15733
15933
|
/**
|
15734
|
-
*
|
15735
|
-
*
|
15934
|
+
* Parse an identifier into either a local variable read. If the local variable
|
15935
|
+
* is not found, it returns NULL instead.
|
15736
15936
|
*/
|
15737
|
-
static
|
15738
|
-
|
15739
|
-
|
15740
|
-
|
15741
|
-
}
|
15937
|
+
static pm_node_t *
|
15938
|
+
parse_variable(pm_parser_t *parser) {
|
15939
|
+
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
|
15940
|
+
int depth;
|
15742
15941
|
|
15743
|
-
|
15744
|
-
|
15745
|
-
|
15746
|
-
/**
|
15747
|
-
* These are the names of the various numbered parameters. We have them here so
|
15748
|
-
* that when we insert them into the constant pool we can use a constant string
|
15749
|
-
* and not have to allocate.
|
15750
|
-
*/
|
15751
|
-
static const char * const pm_numbered_parameter_names[] = {
|
15752
|
-
"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
|
15753
|
-
};
|
15754
|
-
|
15755
|
-
/**
|
15756
|
-
* Parse an identifier into either a local variable read. If the local variable
|
15757
|
-
* is not found, it returns NULL instead.
|
15758
|
-
*/
|
15759
|
-
static pm_local_variable_read_node_t *
|
15760
|
-
parse_variable(pm_parser_t *parser) {
|
15761
|
-
int depth;
|
15762
|
-
if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
|
15763
|
-
return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
|
15764
|
-
}
|
15942
|
+
if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
|
15943
|
+
return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
|
15944
|
+
}
|
15765
15945
|
|
15766
15946
|
pm_scope_t *current_scope = parser->current_scope;
|
15767
|
-
if (!current_scope->closed && current_scope->
|
15768
|
-
|
15769
|
-
|
15770
|
-
|
15771
|
-
|
15772
|
-
|
15773
|
-
|
15774
|
-
|
15775
|
-
|
15776
|
-
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
15777
|
-
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
15778
|
-
} else {
|
15779
|
-
// Indicate that this scope is using numbered params so that child
|
15780
|
-
// scopes cannot. We subtract the value for the character '0' to get
|
15781
|
-
// the actual integer value of the number (only _1 through _9 are
|
15782
|
-
// valid).
|
15783
|
-
int8_t numbered_parameters = (int8_t) (parser->previous.start[1] - '0');
|
15784
|
-
current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED;
|
15785
|
-
|
15786
|
-
if (numbered_parameters > current_scope->numbered_parameters) {
|
15787
|
-
current_scope->numbered_parameters = numbered_parameters;
|
15947
|
+
if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
|
15948
|
+
if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
|
15949
|
+
// When you use a numbered parameter, it implies the existence of
|
15950
|
+
// all of the locals that exist before it. For example, referencing
|
15951
|
+
// _2 means that _1 must exist. Therefore here we loop through all
|
15952
|
+
// of the possibilities and add them into the constant pool.
|
15953
|
+
uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
|
15954
|
+
for (uint8_t number = 1; number <= maximum; number++) {
|
15955
|
+
pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
|
15788
15956
|
}
|
15789
15957
|
|
15790
|
-
|
15791
|
-
|
15792
|
-
// referencing _2 means that _1 must exist. Therefore here we
|
15793
|
-
// loop through all of the possibilities and add them into the
|
15794
|
-
// constant pool.
|
15795
|
-
for (int8_t numbered_param = 1; numbered_param <= numbered_parameters - 1; numbered_param++) {
|
15796
|
-
pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_param - 1], 2);
|
15958
|
+
if (!match1(parser, PM_TOKEN_EQUAL)) {
|
15959
|
+
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
|
15797
15960
|
}
|
15798
15961
|
|
15799
|
-
|
15800
|
-
|
15801
|
-
|
15962
|
+
pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
|
15963
|
+
pm_node_list_append(¤t_scope->implicit_parameters, node);
|
15964
|
+
|
15965
|
+
return node;
|
15966
|
+
} else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
|
15967
|
+
pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
|
15968
|
+
pm_node_list_append(¤t_scope->implicit_parameters, node);
|
15969
|
+
|
15970
|
+
return node;
|
15802
15971
|
}
|
15803
15972
|
}
|
15804
15973
|
|
@@ -15813,8 +15982,8 @@ parse_variable_call(pm_parser_t *parser) {
|
|
15813
15982
|
pm_node_flags_t flags = 0;
|
15814
15983
|
|
15815
15984
|
if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
|
15816
|
-
|
15817
|
-
if (node != NULL) return
|
15985
|
+
pm_node_t *node = parse_variable(parser);
|
15986
|
+
if (node != NULL) return node;
|
15818
15987
|
flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
|
15819
15988
|
}
|
15820
15989
|
|
@@ -15932,6 +16101,230 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
|
|
15932
16101
|
nodes->size = write_index;
|
15933
16102
|
}
|
15934
16103
|
|
16104
|
+
/**
|
16105
|
+
* Return a string content token at a particular location that is empty.
|
16106
|
+
*/
|
16107
|
+
static pm_token_t
|
16108
|
+
parse_strings_empty_content(const uint8_t *location) {
|
16109
|
+
return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
|
16110
|
+
}
|
16111
|
+
|
16112
|
+
/**
|
16113
|
+
* Parse a set of strings that could be concatenated together.
|
16114
|
+
*/
|
16115
|
+
static inline pm_node_t *
|
16116
|
+
parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
16117
|
+
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
|
16118
|
+
|
16119
|
+
bool concating = false;
|
16120
|
+
bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
|
16121
|
+
|
16122
|
+
while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
16123
|
+
pm_node_t *node = NULL;
|
16124
|
+
|
16125
|
+
// Here we have found a string literal. We'll parse it and add it to
|
16126
|
+
// the list of strings.
|
16127
|
+
const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
16128
|
+
assert(lex_mode->mode == PM_LEX_STRING);
|
16129
|
+
bool lex_interpolation = lex_mode->as.string.interpolation;
|
16130
|
+
|
16131
|
+
pm_token_t opening = parser->current;
|
16132
|
+
parser_lex(parser);
|
16133
|
+
|
16134
|
+
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16135
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16136
|
+
// If we get here, then we have an end immediately after a
|
16137
|
+
// start. In that case we'll create an empty content token and
|
16138
|
+
// return an uninterpolated string.
|
16139
|
+
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16140
|
+
pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
|
16141
|
+
|
16142
|
+
pm_string_shared_init(&string->unescaped, content.start, content.end);
|
16143
|
+
node = (pm_node_t *) string;
|
16144
|
+
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16145
|
+
// If we get here, then we have an end of a label immediately
|
16146
|
+
// after a start. In that case we'll create an empty symbol
|
16147
|
+
// node.
|
16148
|
+
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16149
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
|
16150
|
+
|
16151
|
+
pm_string_shared_init(&symbol->unescaped, content.start, content.end);
|
16152
|
+
node = (pm_node_t *) symbol;
|
16153
|
+
} else if (!lex_interpolation) {
|
16154
|
+
// If we don't accept interpolation then we expect the string to
|
16155
|
+
// start with a single string content node.
|
16156
|
+
pm_string_t unescaped;
|
16157
|
+
pm_token_t content;
|
16158
|
+
|
16159
|
+
if (match1(parser, PM_TOKEN_EOF)) {
|
16160
|
+
unescaped = PM_STRING_EMPTY;
|
16161
|
+
content = not_provided(parser);
|
16162
|
+
} else {
|
16163
|
+
unescaped = parser->current_string;
|
16164
|
+
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
|
16165
|
+
content = parser->previous;
|
16166
|
+
}
|
16167
|
+
|
16168
|
+
// It is unfortunately possible to have multiple string content
|
16169
|
+
// nodes in a row in the case that there's heredoc content in
|
16170
|
+
// the middle of the string, like this cursed example:
|
16171
|
+
//
|
16172
|
+
// <<-END+'b
|
16173
|
+
// a
|
16174
|
+
// END
|
16175
|
+
// c'+'d'
|
16176
|
+
//
|
16177
|
+
// In that case we need to switch to an interpolated string to
|
16178
|
+
// be able to contain all of the parts.
|
16179
|
+
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16180
|
+
pm_node_list_t parts = { 0 };
|
16181
|
+
|
16182
|
+
pm_token_t delimiters = not_provided(parser);
|
16183
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
|
16184
|
+
pm_node_list_append(&parts, part);
|
16185
|
+
|
16186
|
+
do {
|
16187
|
+
part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
|
16188
|
+
pm_node_list_append(&parts, part);
|
16189
|
+
parser_lex(parser);
|
16190
|
+
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
16191
|
+
|
16192
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16193
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16194
|
+
|
16195
|
+
pm_node_list_free(&parts);
|
16196
|
+
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16197
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16198
|
+
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16199
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
16200
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16201
|
+
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
|
16202
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16203
|
+
} else {
|
16204
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
|
16205
|
+
parser->previous.start = parser->previous.end;
|
16206
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
16207
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16208
|
+
}
|
16209
|
+
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16210
|
+
// In this case we've hit string content so we know the string
|
16211
|
+
// at least has something in it. We'll need to check if the
|
16212
|
+
// following token is the end (in which case we can return a
|
16213
|
+
// plain string) or if it's not then it has interpolation.
|
16214
|
+
pm_token_t content = parser->current;
|
16215
|
+
pm_string_t unescaped = parser->current_string;
|
16216
|
+
parser_lex(parser);
|
16217
|
+
|
16218
|
+
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16219
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16220
|
+
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
16221
|
+
|
16222
|
+
// Kind of odd behavior, but basically if we have an
|
16223
|
+
// unterminated string and it ends in a newline, we back up one
|
16224
|
+
// character so that the error message is on the last line of
|
16225
|
+
// content in the string.
|
16226
|
+
if (!accept1(parser, PM_TOKEN_STRING_END)) {
|
16227
|
+
const uint8_t *location = parser->previous.end;
|
16228
|
+
if (location > parser->start && location[-1] == '\n') location--;
|
16229
|
+
pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
|
16230
|
+
|
16231
|
+
parser->previous.start = parser->previous.end;
|
16232
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
16233
|
+
}
|
16234
|
+
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16235
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16236
|
+
} else {
|
16237
|
+
// If we get here, then we have interpolation so we'll need
|
16238
|
+
// to create a string or symbol node with interpolation.
|
16239
|
+
pm_node_list_t parts = { 0 };
|
16240
|
+
pm_token_t string_opening = not_provided(parser);
|
16241
|
+
pm_token_t string_closing = not_provided(parser);
|
16242
|
+
|
16243
|
+
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
|
16244
|
+
pm_node_flag_set(part, parse_unescaped_encoding(parser));
|
16245
|
+
pm_node_list_append(&parts, part);
|
16246
|
+
|
16247
|
+
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16248
|
+
if ((part = parse_string_part(parser)) != NULL) {
|
16249
|
+
pm_node_list_append(&parts, part);
|
16250
|
+
}
|
16251
|
+
}
|
16252
|
+
|
16253
|
+
if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16254
|
+
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16255
|
+
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16256
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16257
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16258
|
+
} else {
|
16259
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16260
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16261
|
+
}
|
16262
|
+
|
16263
|
+
pm_node_list_free(&parts);
|
16264
|
+
}
|
16265
|
+
} else {
|
16266
|
+
// If we get here, then the first part of the string is not plain
|
16267
|
+
// string content, in which case we need to parse the string as an
|
16268
|
+
// interpolated string.
|
16269
|
+
pm_node_list_t parts = { 0 };
|
16270
|
+
pm_node_t *part;
|
16271
|
+
|
16272
|
+
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16273
|
+
if ((part = parse_string_part(parser)) != NULL) {
|
16274
|
+
pm_node_list_append(&parts, part);
|
16275
|
+
}
|
16276
|
+
}
|
16277
|
+
|
16278
|
+
if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16279
|
+
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16280
|
+
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16281
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16282
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16283
|
+
} else {
|
16284
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16285
|
+
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16286
|
+
}
|
16287
|
+
|
16288
|
+
pm_node_list_free(&parts);
|
16289
|
+
}
|
16290
|
+
|
16291
|
+
if (current == NULL) {
|
16292
|
+
// If the node we just parsed is a symbol node, then we can't
|
16293
|
+
// concatenate it with anything else, so we can now return that
|
16294
|
+
// node.
|
16295
|
+
if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
|
16296
|
+
return node;
|
16297
|
+
}
|
16298
|
+
|
16299
|
+
// If we don't already have a node, then it's fine and we can just
|
16300
|
+
// set the result to be the node we just parsed.
|
16301
|
+
current = node;
|
16302
|
+
} else {
|
16303
|
+
// Otherwise we need to check the type of the node we just parsed.
|
16304
|
+
// If it cannot be concatenated with the previous node, then we'll
|
16305
|
+
// need to add a syntax error.
|
16306
|
+
if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
|
16307
|
+
pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
|
16308
|
+
}
|
16309
|
+
|
16310
|
+
// If we haven't already created our container for concatenation,
|
16311
|
+
// we'll do that now.
|
16312
|
+
if (!concating) {
|
16313
|
+
concating = true;
|
16314
|
+
pm_token_t bounds = not_provided(parser);
|
16315
|
+
|
16316
|
+
pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
|
16317
|
+
pm_interpolated_string_node_append(container, current);
|
16318
|
+
current = (pm_node_t *) container;
|
16319
|
+
}
|
16320
|
+
|
16321
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
|
16322
|
+
}
|
16323
|
+
}
|
16324
|
+
|
16325
|
+
return current;
|
16326
|
+
}
|
16327
|
+
|
15935
16328
|
#define PM_PARSE_PATTERN_SINGLE 0
|
15936
16329
|
#define PM_PARSE_PATTERN_TOP 1
|
15937
16330
|
#define PM_PARSE_PATTERN_MULTI 2
|
@@ -16214,7 +16607,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
|
|
16214
16607
|
*/
|
16215
16608
|
static void
|
16216
16609
|
parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
|
16217
|
-
if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
|
16610
|
+
if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
|
16218
16611
|
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
|
16219
16612
|
}
|
16220
16613
|
}
|
@@ -16289,8 +16682,20 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
16289
16682
|
pm_node_list_append(&assocs, assoc);
|
16290
16683
|
}
|
16291
16684
|
} else {
|
16292
|
-
|
16293
|
-
|
16685
|
+
pm_node_t *key;
|
16686
|
+
|
16687
|
+
if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
16688
|
+
key = parse_strings(parser, NULL);
|
16689
|
+
|
16690
|
+
if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
|
16691
|
+
pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
|
16692
|
+
} else if (!pm_symbol_node_label_p(key)) {
|
16693
|
+
pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
|
16694
|
+
}
|
16695
|
+
} else {
|
16696
|
+
expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
|
16697
|
+
key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
16698
|
+
}
|
16294
16699
|
|
16295
16700
|
parse_pattern_hash_key(parser, &keys, key);
|
16296
16701
|
pm_node_t *value = NULL;
|
@@ -16502,19 +16907,8 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
16502
16907
|
pm_node_t *variable = (pm_node_t *) parse_variable(parser);
|
16503
16908
|
|
16504
16909
|
if (variable == NULL) {
|
16505
|
-
|
16506
|
-
|
16507
|
-
!parser->current_scope->closed &&
|
16508
|
-
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
16509
|
-
pm_token_is_it(parser->previous.start, parser->previous.end)
|
16510
|
-
) {
|
16511
|
-
pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
|
16512
|
-
if (read == NULL) read = pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
16513
|
-
variable = (pm_node_t *) read;
|
16514
|
-
} else {
|
16515
|
-
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
|
16516
|
-
variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
|
16517
|
-
}
|
16910
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
|
16911
|
+
variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
|
16518
16912
|
}
|
16519
16913
|
|
16520
16914
|
return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
|
@@ -16762,276 +17156,67 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
|
|
16762
17156
|
}
|
16763
17157
|
|
16764
17158
|
trailing_rest = true;
|
16765
|
-
} else {
|
16766
|
-
node = parse_pattern_primitives(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA);
|
16767
|
-
}
|
16768
|
-
|
16769
|
-
pm_node_list_append(&nodes, node);
|
16770
|
-
}
|
16771
|
-
|
16772
|
-
// If the first pattern and the last pattern are rest patterns, then we will
|
16773
|
-
// call this a find pattern, regardless of how many rest patterns are in
|
16774
|
-
// between because we know we already added the appropriate errors.
|
16775
|
-
// Otherwise we will create an array pattern.
|
16776
|
-
if (PM_NODE_TYPE_P(nodes.nodes[0], PM_SPLAT_NODE) && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
|
16777
|
-
node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
|
16778
|
-
} else {
|
16779
|
-
node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
|
16780
|
-
}
|
16781
|
-
|
16782
|
-
xfree(nodes.nodes);
|
16783
|
-
} else if (leading_rest) {
|
16784
|
-
// Otherwise, if we parsed a single splat pattern, then we know we have an
|
16785
|
-
// array pattern, so we can go ahead and create that node.
|
16786
|
-
node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
|
16787
|
-
}
|
16788
|
-
|
16789
|
-
return node;
|
16790
|
-
}
|
16791
|
-
|
16792
|
-
/**
|
16793
|
-
* Incorporate a negative sign into a numeric node by subtracting 1 character
|
16794
|
-
* from its start bounds. If it's a compound node, then we will recursively
|
16795
|
-
* apply this function to its value.
|
16796
|
-
*/
|
16797
|
-
static inline void
|
16798
|
-
parse_negative_numeric(pm_node_t *node) {
|
16799
|
-
switch (PM_NODE_TYPE(node)) {
|
16800
|
-
case PM_INTEGER_NODE: {
|
16801
|
-
pm_integer_node_t *cast = (pm_integer_node_t *) node;
|
16802
|
-
cast->base.location.start--;
|
16803
|
-
cast->value.negative = true;
|
16804
|
-
break;
|
16805
|
-
}
|
16806
|
-
case PM_FLOAT_NODE: {
|
16807
|
-
pm_float_node_t *cast = (pm_float_node_t *) node;
|
16808
|
-
cast->base.location.start--;
|
16809
|
-
cast->value = -cast->value;
|
16810
|
-
break;
|
16811
|
-
}
|
16812
|
-
case PM_RATIONAL_NODE:
|
16813
|
-
node->location.start--;
|
16814
|
-
parse_negative_numeric(((pm_rational_node_t *) node)->numeric);
|
16815
|
-
break;
|
16816
|
-
case PM_IMAGINARY_NODE:
|
16817
|
-
node->location.start--;
|
16818
|
-
parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
|
16819
|
-
break;
|
16820
|
-
default:
|
16821
|
-
assert(false && "unreachable");
|
16822
|
-
break;
|
16823
|
-
}
|
16824
|
-
}
|
16825
|
-
|
16826
|
-
/**
|
16827
|
-
* Return a string content token at a particular location that is empty.
|
16828
|
-
*/
|
16829
|
-
static pm_token_t
|
16830
|
-
parse_strings_empty_content(const uint8_t *location) {
|
16831
|
-
return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
|
16832
|
-
}
|
16833
|
-
|
16834
|
-
/**
|
16835
|
-
* Parse a set of strings that could be concatenated together.
|
16836
|
-
*/
|
16837
|
-
static inline pm_node_t *
|
16838
|
-
parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
16839
|
-
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
|
16840
|
-
|
16841
|
-
bool concating = false;
|
16842
|
-
bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
|
16843
|
-
|
16844
|
-
while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
16845
|
-
pm_node_t *node = NULL;
|
16846
|
-
|
16847
|
-
// Here we have found a string literal. We'll parse it and add it to
|
16848
|
-
// the list of strings.
|
16849
|
-
const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
16850
|
-
assert(lex_mode->mode == PM_LEX_STRING);
|
16851
|
-
bool lex_interpolation = lex_mode->as.string.interpolation;
|
16852
|
-
|
16853
|
-
pm_token_t opening = parser->current;
|
16854
|
-
parser_lex(parser);
|
16855
|
-
|
16856
|
-
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16857
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16858
|
-
// If we get here, then we have an end immediately after a
|
16859
|
-
// start. In that case we'll create an empty content token and
|
16860
|
-
// return an uninterpolated string.
|
16861
|
-
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16862
|
-
pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
|
16863
|
-
|
16864
|
-
pm_string_shared_init(&string->unescaped, content.start, content.end);
|
16865
|
-
node = (pm_node_t *) string;
|
16866
|
-
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16867
|
-
// If we get here, then we have an end of a label immediately
|
16868
|
-
// after a start. In that case we'll create an empty symbol
|
16869
|
-
// node.
|
16870
|
-
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
16871
|
-
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
|
16872
|
-
|
16873
|
-
pm_string_shared_init(&symbol->unescaped, content.start, content.end);
|
16874
|
-
node = (pm_node_t *) symbol;
|
16875
|
-
} else if (!lex_interpolation) {
|
16876
|
-
// If we don't accept interpolation then we expect the string to
|
16877
|
-
// start with a single string content node.
|
16878
|
-
pm_string_t unescaped;
|
16879
|
-
pm_token_t content;
|
16880
|
-
if (match1(parser, PM_TOKEN_EOF)) {
|
16881
|
-
unescaped = PM_STRING_EMPTY;
|
16882
|
-
content = not_provided(parser);
|
16883
|
-
} else {
|
16884
|
-
unescaped = parser->current_string;
|
16885
|
-
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
|
16886
|
-
content = parser->previous;
|
16887
|
-
}
|
16888
|
-
|
16889
|
-
// It is unfortunately possible to have multiple string content
|
16890
|
-
// nodes in a row in the case that there's heredoc content in
|
16891
|
-
// the middle of the string, like this cursed example:
|
16892
|
-
//
|
16893
|
-
// <<-END+'b
|
16894
|
-
// a
|
16895
|
-
// END
|
16896
|
-
// c'+'d'
|
16897
|
-
//
|
16898
|
-
// In that case we need to switch to an interpolated string to
|
16899
|
-
// be able to contain all of the parts.
|
16900
|
-
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16901
|
-
pm_node_list_t parts = { 0 };
|
16902
|
-
|
16903
|
-
pm_token_t delimiters = not_provided(parser);
|
16904
|
-
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
|
16905
|
-
pm_node_list_append(&parts, part);
|
16906
|
-
|
16907
|
-
do {
|
16908
|
-
part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
|
16909
|
-
pm_node_list_append(&parts, part);
|
16910
|
-
parser_lex(parser);
|
16911
|
-
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
16912
|
-
|
16913
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16914
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16915
|
-
|
16916
|
-
pm_node_list_free(&parts);
|
16917
|
-
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16918
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16919
|
-
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16920
|
-
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
16921
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16922
|
-
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
|
16923
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16924
|
-
} else {
|
16925
|
-
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
|
16926
|
-
parser->previous.start = parser->previous.end;
|
16927
|
-
parser->previous.type = PM_TOKEN_MISSING;
|
16928
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
16929
|
-
}
|
16930
|
-
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
16931
|
-
// In this case we've hit string content so we know the string
|
16932
|
-
// at least has something in it. We'll need to check if the
|
16933
|
-
// following token is the end (in which case we can return a
|
16934
|
-
// plain string) or if it's not then it has interpolation.
|
16935
|
-
pm_token_t content = parser->current;
|
16936
|
-
pm_string_t unescaped = parser->current_string;
|
16937
|
-
parser_lex(parser);
|
16938
|
-
|
16939
|
-
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
16940
|
-
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
16941
|
-
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
16942
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16943
|
-
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16944
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16945
|
-
} else {
|
16946
|
-
// If we get here, then we have interpolation so we'll need
|
16947
|
-
// to create a string or symbol node with interpolation.
|
16948
|
-
pm_node_list_t parts = { 0 };
|
16949
|
-
pm_token_t string_opening = not_provided(parser);
|
16950
|
-
pm_token_t string_closing = not_provided(parser);
|
16951
|
-
|
16952
|
-
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
|
16953
|
-
pm_node_flag_set(part, parse_unescaped_encoding(parser));
|
16954
|
-
pm_node_list_append(&parts, part);
|
16955
|
-
|
16956
|
-
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16957
|
-
if ((part = parse_string_part(parser)) != NULL) {
|
16958
|
-
pm_node_list_append(&parts, part);
|
16959
|
-
}
|
16960
|
-
}
|
16961
|
-
|
16962
|
-
if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16963
|
-
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16964
|
-
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16965
|
-
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16966
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16967
|
-
} else {
|
16968
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16969
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
16970
|
-
}
|
16971
|
-
|
16972
|
-
pm_node_list_free(&parts);
|
16973
|
-
}
|
16974
|
-
} else {
|
16975
|
-
// If we get here, then the first part of the string is not plain
|
16976
|
-
// string content, in which case we need to parse the string as an
|
16977
|
-
// interpolated string.
|
16978
|
-
pm_node_list_t parts = { 0 };
|
16979
|
-
pm_node_t *part;
|
16980
|
-
|
16981
|
-
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
|
16982
|
-
if ((part = parse_string_part(parser)) != NULL) {
|
16983
|
-
pm_node_list_append(&parts, part);
|
16984
|
-
}
|
16985
|
-
}
|
16986
|
-
|
16987
|
-
if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16988
|
-
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
16989
|
-
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16990
|
-
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
|
16991
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
|
16992
|
-
} else {
|
16993
|
-
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
|
16994
|
-
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
17159
|
+
} else {
|
17160
|
+
node = parse_pattern_primitives(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA);
|
16995
17161
|
}
|
16996
17162
|
|
16997
|
-
|
17163
|
+
pm_node_list_append(&nodes, node);
|
16998
17164
|
}
|
16999
17165
|
|
17000
|
-
|
17001
|
-
|
17002
|
-
|
17003
|
-
|
17004
|
-
|
17005
|
-
|
17006
|
-
}
|
17007
|
-
|
17008
|
-
// If we don't already have a node, then it's fine and we can just
|
17009
|
-
// set the result to be the node we just parsed.
|
17010
|
-
current = node;
|
17166
|
+
// If the first pattern and the last pattern are rest patterns, then we will
|
17167
|
+
// call this a find pattern, regardless of how many rest patterns are in
|
17168
|
+
// between because we know we already added the appropriate errors.
|
17169
|
+
// Otherwise we will create an array pattern.
|
17170
|
+
if (PM_NODE_TYPE_P(nodes.nodes[0], PM_SPLAT_NODE) && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
|
17171
|
+
node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
|
17011
17172
|
} else {
|
17012
|
-
|
17013
|
-
|
17014
|
-
// need to add a syntax error.
|
17015
|
-
if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
|
17016
|
-
pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
|
17017
|
-
}
|
17173
|
+
node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
|
17174
|
+
}
|
17018
17175
|
|
17019
|
-
|
17020
|
-
|
17021
|
-
|
17022
|
-
|
17023
|
-
|
17176
|
+
xfree(nodes.nodes);
|
17177
|
+
} else if (leading_rest) {
|
17178
|
+
// Otherwise, if we parsed a single splat pattern, then we know we have an
|
17179
|
+
// array pattern, so we can go ahead and create that node.
|
17180
|
+
node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
|
17181
|
+
}
|
17024
17182
|
|
17025
|
-
|
17026
|
-
|
17027
|
-
current = (pm_node_t *) container;
|
17028
|
-
}
|
17183
|
+
return node;
|
17184
|
+
}
|
17029
17185
|
|
17030
|
-
|
17186
|
+
/**
|
17187
|
+
* Incorporate a negative sign into a numeric node by subtracting 1 character
|
17188
|
+
* from its start bounds. If it's a compound node, then we will recursively
|
17189
|
+
* apply this function to its value.
|
17190
|
+
*/
|
17191
|
+
static inline void
|
17192
|
+
parse_negative_numeric(pm_node_t *node) {
|
17193
|
+
switch (PM_NODE_TYPE(node)) {
|
17194
|
+
case PM_INTEGER_NODE: {
|
17195
|
+
pm_integer_node_t *cast = (pm_integer_node_t *) node;
|
17196
|
+
cast->base.location.start--;
|
17197
|
+
cast->value.negative = true;
|
17198
|
+
break;
|
17199
|
+
}
|
17200
|
+
case PM_FLOAT_NODE: {
|
17201
|
+
pm_float_node_t *cast = (pm_float_node_t *) node;
|
17202
|
+
cast->base.location.start--;
|
17203
|
+
cast->value = -cast->value;
|
17204
|
+
break;
|
17205
|
+
}
|
17206
|
+
case PM_RATIONAL_NODE: {
|
17207
|
+
pm_rational_node_t *cast = (pm_rational_node_t *) node;
|
17208
|
+
cast->base.location.start--;
|
17209
|
+
cast->numerator.negative = true;
|
17210
|
+
break;
|
17031
17211
|
}
|
17212
|
+
case PM_IMAGINARY_NODE:
|
17213
|
+
node->location.start--;
|
17214
|
+
parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
|
17215
|
+
break;
|
17216
|
+
default:
|
17217
|
+
assert(false && "unreachable");
|
17218
|
+
break;
|
17032
17219
|
}
|
17033
|
-
|
17034
|
-
return current;
|
17035
17220
|
}
|
17036
17221
|
|
17037
17222
|
/**
|
@@ -17229,6 +17414,63 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
|
|
17229
17414
|
}
|
17230
17415
|
}
|
17231
17416
|
|
17417
|
+
/**
|
17418
|
+
* This struct is used to pass information between the regular expression parser
|
17419
|
+
* and the error callback.
|
17420
|
+
*/
|
17421
|
+
typedef struct {
|
17422
|
+
/** The parser that we are parsing the regular expression for. */
|
17423
|
+
pm_parser_t *parser;
|
17424
|
+
|
17425
|
+
/** The start of the regular expression. */
|
17426
|
+
const uint8_t *start;
|
17427
|
+
|
17428
|
+
/** The end of the regular expression. */
|
17429
|
+
const uint8_t *end;
|
17430
|
+
|
17431
|
+
/**
|
17432
|
+
* Whether or not the source of the regular expression is shared. This
|
17433
|
+
* impacts the location of error messages, because if it is shared then we
|
17434
|
+
* can use the location directly and if it is not, then we use the bounds of
|
17435
|
+
* the regular expression itself.
|
17436
|
+
*/
|
17437
|
+
bool shared;
|
17438
|
+
} parse_regular_expression_error_data_t;
|
17439
|
+
|
17440
|
+
/**
|
17441
|
+
* This callback is called when the regular expression parser encounters a
|
17442
|
+
* syntax error.
|
17443
|
+
*/
|
17444
|
+
static void
|
17445
|
+
parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
|
17446
|
+
parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
|
17447
|
+
pm_location_t location;
|
17448
|
+
|
17449
|
+
if (callback_data->shared) {
|
17450
|
+
location = (pm_location_t) { .start = start, .end = end };
|
17451
|
+
} else {
|
17452
|
+
location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
|
17453
|
+
}
|
17454
|
+
|
17455
|
+
PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
|
17456
|
+
}
|
17457
|
+
|
17458
|
+
/**
|
17459
|
+
* Parse the errors for the regular expression and add them to the parser.
|
17460
|
+
*/
|
17461
|
+
static void
|
17462
|
+
parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
|
17463
|
+
const pm_string_t *unescaped = &node->unescaped;
|
17464
|
+
parse_regular_expression_error_data_t error_data = {
|
17465
|
+
.parser = parser,
|
17466
|
+
.start = node->base.location.start,
|
17467
|
+
.end = node->base.location.end,
|
17468
|
+
.shared = unescaped->type == PM_STRING_SHARED
|
17469
|
+
};
|
17470
|
+
|
17471
|
+
pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), NULL, NULL, parse_regular_expression_error, &error_data);
|
17472
|
+
}
|
17473
|
+
|
17232
17474
|
/**
|
17233
17475
|
* Parse an expression that begins with the previous node that we just lexed.
|
17234
17476
|
*/
|
@@ -17249,8 +17491,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17249
17491
|
break;
|
17250
17492
|
}
|
17251
17493
|
|
17252
|
-
|
17253
|
-
|
17494
|
+
// Ensure that we have a comma between elements in the array.
|
17495
|
+
if ((pm_array_node_size(array) != 0) && !accept1(parser, PM_TOKEN_COMMA)) {
|
17496
|
+
const uint8_t *location = parser->previous.end;
|
17497
|
+
PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
|
17498
|
+
|
17499
|
+
parser->previous.start = location;
|
17500
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
17254
17501
|
}
|
17255
17502
|
|
17256
17503
|
// If we have a right bracket immediately following a comma,
|
@@ -17428,7 +17675,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17428
17675
|
|
17429
17676
|
// If we didn't find a terminator and we didn't find a right
|
17430
17677
|
// parenthesis, then this is a syntax error.
|
17431
|
-
if (!terminator_found) {
|
17678
|
+
if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
|
17432
17679
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
17433
17680
|
}
|
17434
17681
|
|
@@ -17457,7 +17704,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17457
17704
|
if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
|
17458
17705
|
} else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
17459
17706
|
break;
|
17460
|
-
} else {
|
17707
|
+
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
17708
|
+
// If we're at the end of the file, then we're going to add
|
17709
|
+
// an error after this for the ) anyway.
|
17461
17710
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
17462
17711
|
}
|
17463
17712
|
}
|
@@ -17676,8 +17925,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17676
17925
|
) {
|
17677
17926
|
pm_arguments_t arguments = { 0 };
|
17678
17927
|
parse_arguments_list(parser, &arguments, true, accepts_command_call);
|
17679
|
-
|
17680
17928
|
pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
|
17929
|
+
|
17930
|
+
if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
|
17931
|
+
// If we're about to convert an 'it' implicit local
|
17932
|
+
// variable read into a method call, we need to remove
|
17933
|
+
// it from the list of implicit local variables.
|
17934
|
+
parse_target_implicit_parameter(parser, node);
|
17935
|
+
} else {
|
17936
|
+
// Otherwise, we're about to convert a regular local
|
17937
|
+
// variable read into a method call, in which case we
|
17938
|
+
// need to indicate that this was not a read for the
|
17939
|
+
// purposes of warnings.
|
17940
|
+
assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
|
17941
|
+
|
17942
|
+
if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
|
17943
|
+
parse_target_implicit_parameter(parser, node);
|
17944
|
+
} else {
|
17945
|
+
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
|
17946
|
+
pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
|
17947
|
+
}
|
17948
|
+
}
|
17949
|
+
|
17681
17950
|
pm_node_destroy(parser, node);
|
17682
17951
|
return (pm_node_t *) fcall;
|
17683
17952
|
}
|
@@ -17685,31 +17954,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17685
17954
|
|
17686
17955
|
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
|
17687
17956
|
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
|
17688
|
-
} else {
|
17689
|
-
// Check if `it` is not going to be assigned.
|
17690
|
-
switch (parser->current.type) {
|
17691
|
-
case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
|
17692
|
-
case PM_TOKEN_AMPERSAND_EQUAL:
|
17693
|
-
case PM_TOKEN_CARET_EQUAL:
|
17694
|
-
case PM_TOKEN_EQUAL:
|
17695
|
-
case PM_TOKEN_GREATER_GREATER_EQUAL:
|
17696
|
-
case PM_TOKEN_LESS_LESS_EQUAL:
|
17697
|
-
case PM_TOKEN_MINUS_EQUAL:
|
17698
|
-
case PM_TOKEN_PARENTHESIS_RIGHT:
|
17699
|
-
case PM_TOKEN_PERCENT_EQUAL:
|
17700
|
-
case PM_TOKEN_PIPE_EQUAL:
|
17701
|
-
case PM_TOKEN_PIPE_PIPE_EQUAL:
|
17702
|
-
case PM_TOKEN_PLUS_EQUAL:
|
17703
|
-
case PM_TOKEN_SLASH_EQUAL:
|
17704
|
-
case PM_TOKEN_STAR_EQUAL:
|
17705
|
-
case PM_TOKEN_STAR_STAR_EQUAL:
|
17706
|
-
break;
|
17707
|
-
default:
|
17708
|
-
// Once we know it's neither a method call nor an
|
17709
|
-
// assignment, we can finally create `it` default
|
17710
|
-
// parameter.
|
17711
|
-
node = pm_node_check_it(parser, node);
|
17712
|
-
}
|
17713
17957
|
}
|
17714
17958
|
|
17715
17959
|
return node;
|
@@ -17970,6 +18214,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17970
18214
|
// as frozen because when clause strings are frozen.
|
17971
18215
|
if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
|
17972
18216
|
pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
|
18217
|
+
} else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
|
18218
|
+
pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
|
17973
18219
|
}
|
17974
18220
|
|
17975
18221
|
pm_when_clause_static_literals_add(parser, &literals, condition);
|
@@ -18375,7 +18621,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18375
18621
|
|
18376
18622
|
if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
|
18377
18623
|
receiver = parse_variable_call(parser);
|
18378
|
-
receiver = pm_node_check_it(parser, receiver);
|
18379
18624
|
|
18380
18625
|
pm_parser_scope_push(parser, true);
|
18381
18626
|
lex_state_set(parser, PM_LEX_STATE_FNAME);
|
@@ -18712,7 +18957,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18712
18957
|
if (match1(parser, PM_TOKEN_COMMA)) {
|
18713
18958
|
index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
|
18714
18959
|
} else {
|
18715
|
-
index = parse_target(parser, index, false);
|
18960
|
+
index = parse_target(parser, index, false, false);
|
18716
18961
|
}
|
18717
18962
|
|
18718
18963
|
context_pop(parser);
|
@@ -19347,13 +19592,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19347
19592
|
bool ascii_only = parser->current_regular_expression_ascii_only;
|
19348
19593
|
parser_lex(parser);
|
19349
19594
|
|
19350
|
-
// If we hit an end, then we can create a regular expression
|
19351
|
-
// without interpolation, which can be represented more
|
19352
|
-
// more easily compiled.
|
19595
|
+
// If we hit an end, then we can create a regular expression
|
19596
|
+
// node without interpolation, which can be represented more
|
19597
|
+
// succinctly and more easily compiled.
|
19353
19598
|
if (accept1(parser, PM_TOKEN_REGEXP_END)) {
|
19354
|
-
|
19355
|
-
|
19356
|
-
|
19599
|
+
pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
19600
|
+
|
19601
|
+
// If we're not immediately followed by a =~, then we want
|
19602
|
+
// to parse all of the errors at this point. If it is
|
19603
|
+
// followed by a =~, then it will get parsed higher up while
|
19604
|
+
// parsing the named captures as well.
|
19605
|
+
if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
|
19606
|
+
parse_regular_expression_errors(parser, node);
|
19607
|
+
}
|
19608
|
+
|
19609
|
+
pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
|
19610
|
+
return (pm_node_t *) node;
|
19357
19611
|
}
|
19358
19612
|
|
19359
19613
|
// If we get here, then we have interpolation so we'll need to create
|
@@ -19571,9 +19825,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19571
19825
|
|
19572
19826
|
switch (parser->current.type) {
|
19573
19827
|
case PM_TOKEN_PARENTHESIS_LEFT: {
|
19574
|
-
assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
|
19575
|
-
parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
|
19576
|
-
|
19577
19828
|
pm_token_t opening = parser->current;
|
19578
19829
|
parser_lex(parser);
|
19579
19830
|
|
@@ -19590,9 +19841,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19590
19841
|
break;
|
19591
19842
|
}
|
19592
19843
|
case PM_CASE_PARAMETER: {
|
19593
|
-
assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
|
19594
|
-
parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
|
19595
|
-
|
19596
19844
|
pm_accepts_block_stack_push(parser, false);
|
19597
19845
|
pm_token_t opening = not_provided(parser);
|
19598
19846
|
block_parameters = parse_block_parameters(parser, false, &opening, true);
|
@@ -19845,89 +20093,126 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
|
|
19845
20093
|
}
|
19846
20094
|
|
19847
20095
|
/**
|
19848
|
-
*
|
19849
|
-
*
|
20096
|
+
* This struct is used to pass information between the regular expression parser
|
20097
|
+
* and the named capture callback.
|
19850
20098
|
*/
|
19851
|
-
|
19852
|
-
|
19853
|
-
|
19854
|
-
pm_node_t *result;
|
20099
|
+
typedef struct {
|
20100
|
+
/** The parser that is parsing the regular expression. */
|
20101
|
+
pm_parser_t *parser;
|
19855
20102
|
|
19856
|
-
|
19857
|
-
|
19858
|
-
// are invalid, creating a MatchWriteNode is delaid here.
|
19859
|
-
pm_match_write_node_t *match = NULL;
|
19860
|
-
pm_constant_id_list_t names = { 0 };
|
20103
|
+
/** The call node wrapping the regular expression node. */
|
20104
|
+
pm_call_node_t *call;
|
19861
20105
|
|
19862
|
-
|
19863
|
-
|
20106
|
+
/** The match write node that is being created. */
|
20107
|
+
pm_match_write_node_t *match;
|
19864
20108
|
|
19865
|
-
|
19866
|
-
|
20109
|
+
/** The list of names that have been parsed. */
|
20110
|
+
pm_constant_id_list_t names;
|
19867
20111
|
|
19868
|
-
|
19869
|
-
|
20112
|
+
/**
|
20113
|
+
* Whether the content of the regular expression is shared. This impacts
|
20114
|
+
* whether or not we used owned constants or shared constants in the
|
20115
|
+
* constant pool for the names of the captures.
|
20116
|
+
*/
|
20117
|
+
bool shared;
|
20118
|
+
} parse_regular_expression_named_capture_data_t;
|
19870
20119
|
|
19871
|
-
|
19872
|
-
|
19873
|
-
|
20120
|
+
/**
|
20121
|
+
* This callback is called when the regular expression parser encounters a named
|
20122
|
+
* capture group.
|
20123
|
+
*/
|
20124
|
+
static void
|
20125
|
+
parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
|
20126
|
+
parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data;
|
19874
20127
|
|
19875
|
-
|
19876
|
-
|
19877
|
-
|
19878
|
-
location = (pm_location_t) { .start = source, .end = source + length };
|
19879
|
-
name = pm_parser_constant_id_location(parser, location.start, location.end);
|
19880
|
-
} else {
|
19881
|
-
// Otherwise, the name is a slice of the malloc-ed owned string,
|
19882
|
-
// in which case we need to copy it out into a new string.
|
19883
|
-
location = call->receiver->location;
|
20128
|
+
pm_parser_t *parser = callback_data->parser;
|
20129
|
+
pm_call_node_t *call = callback_data->call;
|
20130
|
+
pm_constant_id_list_t *names = &callback_data->names;
|
19884
20131
|
|
19885
|
-
|
19886
|
-
|
20132
|
+
const uint8_t *source = pm_string_source(capture);
|
20133
|
+
size_t length = pm_string_length(capture);
|
19887
20134
|
|
19888
|
-
|
19889
|
-
|
19890
|
-
}
|
20135
|
+
pm_location_t location;
|
20136
|
+
pm_constant_id_t name;
|
19891
20137
|
|
19892
|
-
|
19893
|
-
|
19894
|
-
|
19895
|
-
if (pm_constant_id_list_includes(&names, name)) continue;
|
19896
|
-
pm_constant_id_list_append(&names, name);
|
20138
|
+
// If the name of the capture group isn't a valid identifier, we do
|
20139
|
+
// not add it to the local table.
|
20140
|
+
if (!pm_slice_is_valid_local(parser, source, source + length)) return;
|
19897
20141
|
|
19898
|
-
|
19899
|
-
|
19900
|
-
|
19901
|
-
|
19902
|
-
|
20142
|
+
if (callback_data->shared) {
|
20143
|
+
// If the unescaped string is a slice of the source, then we can
|
20144
|
+
// copy the names directly. The pointers will line up.
|
20145
|
+
location = (pm_location_t) { .start = source, .end = source + length };
|
20146
|
+
name = pm_parser_constant_id_location(parser, location.start, location.end);
|
20147
|
+
} else {
|
20148
|
+
// Otherwise, the name is a slice of the malloc-ed owned string,
|
20149
|
+
// in which case we need to copy it out into a new string.
|
20150
|
+
location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
|
19903
20151
|
|
19904
|
-
|
19905
|
-
|
20152
|
+
void *memory = xmalloc(length);
|
20153
|
+
if (memory == NULL) abort();
|
19906
20154
|
|
19907
|
-
|
19908
|
-
|
19909
|
-
|
20155
|
+
memcpy(memory, source, length);
|
20156
|
+
name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
|
20157
|
+
}
|
19910
20158
|
|
19911
|
-
|
19912
|
-
|
19913
|
-
|
19914
|
-
|
19915
|
-
|
20159
|
+
// Add this name to the list of constants if it is valid, not duplicated,
|
20160
|
+
// and not a keyword.
|
20161
|
+
if (name != 0 && !pm_constant_id_list_includes(names, name)) {
|
20162
|
+
pm_constant_id_list_append(names, name);
|
20163
|
+
|
20164
|
+
int depth;
|
20165
|
+
if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
|
20166
|
+
// If the local is not already a local but it is a keyword, then we
|
20167
|
+
// do not want to add a capture for this.
|
20168
|
+
if (pm_local_is_keyword((const char *) source, length)) return;
|
20169
|
+
|
20170
|
+
// If the identifier is not already a local, then we will add it to
|
20171
|
+
// the local table.
|
20172
|
+
pm_parser_local_add(parser, name, location.start, location.end, 0);
|
19916
20173
|
}
|
19917
20174
|
|
19918
|
-
|
19919
|
-
|
19920
|
-
|
19921
|
-
|
20175
|
+
// Here we lazily create the MatchWriteNode since we know we're
|
20176
|
+
// about to add a target.
|
20177
|
+
if (callback_data->match == NULL) {
|
20178
|
+
callback_data->match = pm_match_write_node_create(parser, call);
|
19922
20179
|
}
|
19923
20180
|
|
19924
|
-
|
19925
|
-
|
19926
|
-
|
20181
|
+
// Next, create the local variable target and add it to the list of
|
20182
|
+
// targets for the match.
|
20183
|
+
pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
|
20184
|
+
pm_node_list_append(&callback_data->match->targets, target);
|
19927
20185
|
}
|
20186
|
+
}
|
19928
20187
|
|
19929
|
-
|
19930
|
-
|
20188
|
+
/**
|
20189
|
+
* Potentially change a =~ with a regular expression with named captures into a
|
20190
|
+
* match write node.
|
20191
|
+
*/
|
20192
|
+
static pm_node_t *
|
20193
|
+
parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
|
20194
|
+
parse_regular_expression_named_capture_data_t callback_data = {
|
20195
|
+
.parser = parser,
|
20196
|
+
.call = call,
|
20197
|
+
.names = { 0 },
|
20198
|
+
.shared = content->type == PM_STRING_SHARED
|
20199
|
+
};
|
20200
|
+
|
20201
|
+
parse_regular_expression_error_data_t error_data = {
|
20202
|
+
.parser = parser,
|
20203
|
+
.start = call->receiver->location.start,
|
20204
|
+
.end = call->receiver->location.end,
|
20205
|
+
.shared = content->type == PM_STRING_SHARED
|
20206
|
+
};
|
20207
|
+
|
20208
|
+
pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
|
20209
|
+
pm_constant_id_list_free(&callback_data.names);
|
20210
|
+
|
20211
|
+
if (callback_data.match != NULL) {
|
20212
|
+
return (pm_node_t *) callback_data.match;
|
20213
|
+
} else {
|
20214
|
+
return (pm_node_t *) call;
|
20215
|
+
}
|
19931
20216
|
}
|
19932
20217
|
|
19933
20218
|
static inline pm_node_t *
|
@@ -20044,7 +20329,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20044
20329
|
return result;
|
20045
20330
|
}
|
20046
20331
|
case PM_CALL_NODE: {
|
20047
|
-
parser_lex(parser);
|
20048
20332
|
pm_call_node_t *cast = (pm_call_node_t *) node;
|
20049
20333
|
|
20050
20334
|
// If we have a vcall (a method with no arguments and no
|
@@ -20055,6 +20339,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20055
20339
|
pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
|
20056
20340
|
|
20057
20341
|
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
|
20342
|
+
parser_lex(parser);
|
20343
|
+
|
20058
20344
|
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
|
20059
20345
|
pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
|
20060
20346
|
|
@@ -20062,6 +20348,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20062
20348
|
return result;
|
20063
20349
|
}
|
20064
20350
|
|
20351
|
+
// Move past the token here so that we have already added
|
20352
|
+
// the local variable by this point.
|
20353
|
+
parser_lex(parser);
|
20354
|
+
|
20065
20355
|
// If there is no call operator and the message is "[]" then
|
20066
20356
|
// this is an aref expression, and we can transform it into
|
20067
20357
|
// an aset expression.
|
@@ -20157,7 +20447,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20157
20447
|
return result;
|
20158
20448
|
}
|
20159
20449
|
case PM_CALL_NODE: {
|
20160
|
-
parser_lex(parser);
|
20161
20450
|
pm_call_node_t *cast = (pm_call_node_t *) node;
|
20162
20451
|
|
20163
20452
|
// If we have a vcall (a method with no arguments and no
|
@@ -20168,6 +20457,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20168
20457
|
pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
|
20169
20458
|
|
20170
20459
|
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
|
20460
|
+
parser_lex(parser);
|
20461
|
+
|
20171
20462
|
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
|
20172
20463
|
pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
|
20173
20464
|
|
@@ -20175,6 +20466,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20175
20466
|
return result;
|
20176
20467
|
}
|
20177
20468
|
|
20469
|
+
// Move past the token here so that we have already added
|
20470
|
+
// the local variable by this point.
|
20471
|
+
parser_lex(parser);
|
20472
|
+
|
20178
20473
|
// If there is no call operator and the message is "[]" then
|
20179
20474
|
// this is an aref expression, and we can transform it into
|
20180
20475
|
// an aset expression.
|
@@ -20584,7 +20879,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20584
20879
|
|
20585
20880
|
if (
|
20586
20881
|
(parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
|
20587
|
-
(token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))
|
20882
|
+
(accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
|
20588
20883
|
) {
|
20589
20884
|
// If we have a constant immediately following a '::' operator, then
|
20590
20885
|
// this can either be a constant path or a method call, depending on
|
@@ -21127,7 +21422,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
21127
21422
|
|
21128
21423
|
// Scopes given from the outside are not allowed to have numbered
|
21129
21424
|
// parameters.
|
21130
|
-
parser->current_scope->
|
21425
|
+
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
|
21131
21426
|
|
21132
21427
|
for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
|
21133
21428
|
const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
|
@@ -21515,331 +21810,3 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
|
|
21515
21810
|
}
|
21516
21811
|
|
21517
21812
|
#endif
|
21518
|
-
|
21519
|
-
/** An error that is going to be formatted into the output. */
|
21520
|
-
typedef struct {
|
21521
|
-
/** A pointer to the diagnostic that was generated during parsing. */
|
21522
|
-
pm_diagnostic_t *error;
|
21523
|
-
|
21524
|
-
/** The start line of the diagnostic message. */
|
21525
|
-
int32_t line;
|
21526
|
-
|
21527
|
-
/** The column start of the diagnostic message. */
|
21528
|
-
uint32_t column_start;
|
21529
|
-
|
21530
|
-
/** The column end of the diagnostic message. */
|
21531
|
-
uint32_t column_end;
|
21532
|
-
} pm_error_t;
|
21533
|
-
|
21534
|
-
/** The format that will be used to format the errors into the output. */
|
21535
|
-
typedef struct {
|
21536
|
-
/** The prefix that will be used for line numbers. */
|
21537
|
-
const char *number_prefix;
|
21538
|
-
|
21539
|
-
/** The prefix that will be used for blank lines. */
|
21540
|
-
const char *blank_prefix;
|
21541
|
-
|
21542
|
-
/** The divider that will be used between sections of source code. */
|
21543
|
-
const char *divider;
|
21544
|
-
|
21545
|
-
/** The length of the blank prefix. */
|
21546
|
-
size_t blank_prefix_length;
|
21547
|
-
|
21548
|
-
/** The length of the divider. */
|
21549
|
-
size_t divider_length;
|
21550
|
-
} pm_error_format_t;
|
21551
|
-
|
21552
|
-
#define PM_COLOR_GRAY "\033[38;5;102m"
|
21553
|
-
#define PM_COLOR_RED "\033[1;31m"
|
21554
|
-
#define PM_COLOR_RESET "\033[m"
|
21555
|
-
|
21556
|
-
static inline pm_error_t *
|
21557
|
-
pm_parser_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
|
21558
|
-
pm_error_t *errors = xcalloc(error_list->size, sizeof(pm_error_t));
|
21559
|
-
if (errors == NULL) return NULL;
|
21560
|
-
|
21561
|
-
int32_t start_line = parser->start_line;
|
21562
|
-
for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
|
21563
|
-
pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
|
21564
|
-
pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
|
21565
|
-
|
21566
|
-
// We're going to insert this error into the array in sorted order. We
|
21567
|
-
// do this by finding the first error that has a line number greater
|
21568
|
-
// than the current error and then inserting the current error before
|
21569
|
-
// that one.
|
21570
|
-
size_t index = 0;
|
21571
|
-
while (
|
21572
|
-
(index < error_list->size) &&
|
21573
|
-
(errors[index].error != NULL) &&
|
21574
|
-
(
|
21575
|
-
(errors[index].line < start.line) ||
|
21576
|
-
((errors[index].line == start.line) && (errors[index].column_start < start.column))
|
21577
|
-
)
|
21578
|
-
) index++;
|
21579
|
-
|
21580
|
-
// Now we're going to shift all of the errors after this one down one
|
21581
|
-
// index to make room for the new error.
|
21582
|
-
if (index + 1 < error_list->size) {
|
21583
|
-
memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
|
21584
|
-
}
|
21585
|
-
|
21586
|
-
// Finally, we'll insert the error into the array.
|
21587
|
-
uint32_t column_end;
|
21588
|
-
if (start.line == end.line) {
|
21589
|
-
column_end = end.column;
|
21590
|
-
} else {
|
21591
|
-
column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
|
21592
|
-
}
|
21593
|
-
|
21594
|
-
// Ensure we have at least one column of error.
|
21595
|
-
if (start.column == column_end) column_end++;
|
21596
|
-
|
21597
|
-
errors[index] = (pm_error_t) {
|
21598
|
-
.error = error,
|
21599
|
-
.line = start.line,
|
21600
|
-
.column_start = start.column,
|
21601
|
-
.column_end = column_end
|
21602
|
-
};
|
21603
|
-
}
|
21604
|
-
|
21605
|
-
return errors;
|
21606
|
-
}
|
21607
|
-
|
21608
|
-
static inline void
|
21609
|
-
pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, pm_buffer_t *buffer) {
|
21610
|
-
int32_t line_delta = line - parser->start_line;
|
21611
|
-
assert(line_delta >= 0);
|
21612
|
-
|
21613
|
-
size_t index = (size_t) line_delta;
|
21614
|
-
assert(index < newline_list->size);
|
21615
|
-
|
21616
|
-
const uint8_t *start = &parser->start[newline_list->offsets[index]];
|
21617
|
-
const uint8_t *end;
|
21618
|
-
|
21619
|
-
if (index >= newline_list->size - 1) {
|
21620
|
-
end = parser->end;
|
21621
|
-
} else {
|
21622
|
-
end = &parser->start[newline_list->offsets[index + 1]];
|
21623
|
-
}
|
21624
|
-
|
21625
|
-
pm_buffer_append_format(buffer, number_prefix, line);
|
21626
|
-
pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
|
21627
|
-
|
21628
|
-
if (end == parser->end && end[-1] != '\n') {
|
21629
|
-
pm_buffer_append_string(buffer, "\n", 1);
|
21630
|
-
}
|
21631
|
-
}
|
21632
|
-
|
21633
|
-
/**
|
21634
|
-
* Format the errors on the parser into the given buffer.
|
21635
|
-
*/
|
21636
|
-
PRISM_EXPORTED_FUNCTION void
|
21637
|
-
pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, pm_buffer_t *buffer, bool colorize, bool inline_messages) {
|
21638
|
-
assert(error_list->size != 0);
|
21639
|
-
|
21640
|
-
// First, we're going to sort all of the errors by line number using an
|
21641
|
-
// insertion sort into a newly allocated array.
|
21642
|
-
const int32_t start_line = parser->start_line;
|
21643
|
-
const pm_newline_list_t *newline_list = &parser->newline_list;
|
21644
|
-
|
21645
|
-
pm_error_t *errors = pm_parser_errors_format_sort(parser, error_list, newline_list);
|
21646
|
-
if (errors == NULL) return;
|
21647
|
-
|
21648
|
-
// Now we're going to determine how we're going to format line numbers and
|
21649
|
-
// blank lines based on the maximum number of digits in the line numbers
|
21650
|
-
// that are going to be displaid.
|
21651
|
-
pm_error_format_t error_format;
|
21652
|
-
int32_t first_line_number = errors[0].line;
|
21653
|
-
int32_t last_line_number = errors[error_list->size - 1].line;
|
21654
|
-
|
21655
|
-
// If we have a maximum line number that is negative, then we're going to
|
21656
|
-
// use the absolute value for comparison but multiple by 10 to additionally
|
21657
|
-
// have a column for the negative sign.
|
21658
|
-
if (first_line_number < 0) first_line_number = (-first_line_number) * 10;
|
21659
|
-
if (last_line_number < 0) last_line_number = (-last_line_number) * 10;
|
21660
|
-
int32_t max_line_number = first_line_number > last_line_number ? first_line_number : last_line_number;
|
21661
|
-
|
21662
|
-
if (max_line_number < 10) {
|
21663
|
-
if (colorize) {
|
21664
|
-
error_format = (pm_error_format_t) {
|
21665
|
-
.number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
|
21666
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21667
|
-
.divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
|
21668
|
-
};
|
21669
|
-
} else {
|
21670
|
-
error_format = (pm_error_format_t) {
|
21671
|
-
.number_prefix = "%1" PRIi32 " | ",
|
21672
|
-
.blank_prefix = " | ",
|
21673
|
-
.divider = " ~~~~~\n"
|
21674
|
-
};
|
21675
|
-
}
|
21676
|
-
} else if (max_line_number < 100) {
|
21677
|
-
if (colorize) {
|
21678
|
-
error_format = (pm_error_format_t) {
|
21679
|
-
.number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
|
21680
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21681
|
-
.divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
|
21682
|
-
};
|
21683
|
-
} else {
|
21684
|
-
error_format = (pm_error_format_t) {
|
21685
|
-
.number_prefix = "%2" PRIi32 " | ",
|
21686
|
-
.blank_prefix = " | ",
|
21687
|
-
.divider = " ~~~~~~\n"
|
21688
|
-
};
|
21689
|
-
}
|
21690
|
-
} else if (max_line_number < 1000) {
|
21691
|
-
if (colorize) {
|
21692
|
-
error_format = (pm_error_format_t) {
|
21693
|
-
.number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
|
21694
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21695
|
-
.divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
|
21696
|
-
};
|
21697
|
-
} else {
|
21698
|
-
error_format = (pm_error_format_t) {
|
21699
|
-
.number_prefix = "%3" PRIi32 " | ",
|
21700
|
-
.blank_prefix = " | ",
|
21701
|
-
.divider = " ~~~~~~~\n"
|
21702
|
-
};
|
21703
|
-
}
|
21704
|
-
} else if (max_line_number < 10000) {
|
21705
|
-
if (colorize) {
|
21706
|
-
error_format = (pm_error_format_t) {
|
21707
|
-
.number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
|
21708
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21709
|
-
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
21710
|
-
};
|
21711
|
-
} else {
|
21712
|
-
error_format = (pm_error_format_t) {
|
21713
|
-
.number_prefix = "%4" PRIi32 " | ",
|
21714
|
-
.blank_prefix = " | ",
|
21715
|
-
.divider = " ~~~~~~~~\n"
|
21716
|
-
};
|
21717
|
-
}
|
21718
|
-
} else {
|
21719
|
-
if (colorize) {
|
21720
|
-
error_format = (pm_error_format_t) {
|
21721
|
-
.number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
|
21722
|
-
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
21723
|
-
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
21724
|
-
};
|
21725
|
-
} else {
|
21726
|
-
error_format = (pm_error_format_t) {
|
21727
|
-
.number_prefix = "%5" PRIi32 " | ",
|
21728
|
-
.blank_prefix = " | ",
|
21729
|
-
.divider = " ~~~~~~~~\n"
|
21730
|
-
};
|
21731
|
-
}
|
21732
|
-
}
|
21733
|
-
|
21734
|
-
error_format.blank_prefix_length = strlen(error_format.blank_prefix);
|
21735
|
-
error_format.divider_length = strlen(error_format.divider);
|
21736
|
-
|
21737
|
-
// Now we're going to iterate through every error in our error list and
|
21738
|
-
// display it. While we're iterating, we will display some padding lines of
|
21739
|
-
// the source before the error to give some context. We'll be careful not to
|
21740
|
-
// display the same line twice in case the errors are close enough in the
|
21741
|
-
// source.
|
21742
|
-
int32_t last_line = parser->start_line - 1;
|
21743
|
-
const pm_encoding_t *encoding = parser->encoding;
|
21744
|
-
|
21745
|
-
for (size_t index = 0; index < error_list->size; index++) {
|
21746
|
-
pm_error_t *error = &errors[index];
|
21747
|
-
|
21748
|
-
// Here we determine how many lines of padding of the source to display,
|
21749
|
-
// based on the difference from the last line that was displaid.
|
21750
|
-
if (error->line - last_line > 1) {
|
21751
|
-
if (error->line - last_line > 2) {
|
21752
|
-
if ((index != 0) && (error->line - last_line > 3)) {
|
21753
|
-
pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
|
21754
|
-
}
|
21755
|
-
|
21756
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21757
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
|
21758
|
-
}
|
21759
|
-
|
21760
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21761
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
|
21762
|
-
}
|
21763
|
-
|
21764
|
-
// If this is the first error or we're on a new line, then we'll display
|
21765
|
-
// the line that has the error in it.
|
21766
|
-
if ((index == 0) || (error->line != last_line)) {
|
21767
|
-
if (colorize) {
|
21768
|
-
pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 12);
|
21769
|
-
} else {
|
21770
|
-
pm_buffer_append_string(buffer, "> ", 2);
|
21771
|
-
}
|
21772
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
|
21773
|
-
}
|
21774
|
-
|
21775
|
-
const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
|
21776
|
-
if (start == parser->end) pm_buffer_append_byte(buffer, '\n');
|
21777
|
-
|
21778
|
-
// Now we'll display the actual error message. We'll do this by first
|
21779
|
-
// putting the prefix to the line, then a bunch of blank spaces
|
21780
|
-
// depending on the column, then as many carets as we need to display
|
21781
|
-
// the width of the error, then the error message itself.
|
21782
|
-
//
|
21783
|
-
// Note that this doesn't take into account the width of the actual
|
21784
|
-
// character when displaid in the terminal. For some east-asian
|
21785
|
-
// languages or emoji, this means it can be thrown off pretty badly. We
|
21786
|
-
// will need to solve this eventually.
|
21787
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21788
|
-
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
|
21789
|
-
|
21790
|
-
size_t column = 0;
|
21791
|
-
while (column < error->column_start) {
|
21792
|
-
pm_buffer_append_byte(buffer, ' ');
|
21793
|
-
|
21794
|
-
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21795
|
-
column += (char_width == 0 ? 1 : char_width);
|
21796
|
-
}
|
21797
|
-
|
21798
|
-
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
|
21799
|
-
pm_buffer_append_byte(buffer, '^');
|
21800
|
-
|
21801
|
-
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21802
|
-
column += (char_width == 0 ? 1 : char_width);
|
21803
|
-
|
21804
|
-
while (column < error->column_end) {
|
21805
|
-
pm_buffer_append_byte(buffer, '~');
|
21806
|
-
|
21807
|
-
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21808
|
-
column += (char_width == 0 ? 1 : char_width);
|
21809
|
-
}
|
21810
|
-
|
21811
|
-
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
|
21812
|
-
|
21813
|
-
if (inline_messages) {
|
21814
|
-
pm_buffer_append_byte(buffer, ' ');
|
21815
|
-
assert(error->error != NULL);
|
21816
|
-
|
21817
|
-
const char *message = error->error->message;
|
21818
|
-
pm_buffer_append_string(buffer, message, strlen(message));
|
21819
|
-
}
|
21820
|
-
|
21821
|
-
pm_buffer_append_byte(buffer, '\n');
|
21822
|
-
|
21823
|
-
// Here we determine how many lines of padding to display after the
|
21824
|
-
// error, depending on where the next error is in source.
|
21825
|
-
last_line = error->line;
|
21826
|
-
int32_t next_line = (index == error_list->size - 1) ? (((int32_t) newline_list->size) + parser->start_line) : errors[index + 1].line;
|
21827
|
-
|
21828
|
-
if (next_line - last_line > 1) {
|
21829
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21830
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
21831
|
-
}
|
21832
|
-
|
21833
|
-
if (next_line - last_line > 1) {
|
21834
|
-
pm_buffer_append_string(buffer, " ", 2);
|
21835
|
-
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
21836
|
-
}
|
21837
|
-
}
|
21838
|
-
|
21839
|
-
// Finally, we'll free the array of errors that we allocated.
|
21840
|
-
xfree(errors);
|
21841
|
-
}
|
21842
|
-
|
21843
|
-
#undef PM_COLOR_GRAY
|
21844
|
-
#undef PM_COLOR_RED
|
21845
|
-
#undef PM_COLOR_RESET
|