prism 0.22.0 → 0.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +39 -1
- data/README.md +2 -1
- data/docs/releasing.md +67 -17
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +2 -0
- data/ext/prism/api_node.c +1982 -1538
- data/ext/prism/extension.c +12 -7
- data/ext/prism/extension.h +2 -2
- data/include/prism/diagnostic.h +3 -4
- data/include/prism/encoding.h +7 -0
- data/include/prism/util/pm_constant_pool.h +1 -1
- data/include/prism/util/pm_newline_list.h +4 -3
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/lib/prism/desugar_compiler.rb +225 -80
- data/lib/prism/dsl.rb +302 -299
- data/lib/prism/ffi.rb +103 -77
- data/lib/prism/lex_compat.rb +1 -0
- data/lib/prism/node.rb +3624 -2114
- data/lib/prism/node_ext.rb +25 -2
- data/lib/prism/parse_result.rb +56 -19
- data/lib/prism/serialize.rb +605 -303
- data/lib/prism/translation/parser/compiler.rb +1 -1
- data/lib/prism/translation/parser/rubocop.rb +11 -3
- data/lib/prism/translation/parser.rb +25 -12
- data/lib/prism/translation/parser33.rb +12 -0
- data/lib/prism/translation/parser34.rb +12 -0
- data/lib/prism/translation/ripper.rb +696 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +3 -3
- data/lib/prism.rb +0 -1
- data/prism.gemspec +6 -2
- data/src/diagnostic.c +10 -11
- data/src/encoding.c +16 -17
- data/src/options.c +7 -2
- data/src/prettyprint.c +3 -3
- data/src/prism.c +172 -97
- data/src/serialize.c +24 -13
- data/src/token_type.c +3 -3
- data/src/util/pm_constant_pool.c +1 -1
- data/src/util/pm_newline_list.c +6 -3
- data/src/util/pm_strpbrk.c +122 -14
- metadata +8 -4
- data/lib/prism/ripper_compat.rb +0 -285
data/src/prism.c
CHANGED
@@ -51,6 +51,7 @@ debug_context(pm_context_t context) {
|
|
51
51
|
case PM_CONTEXT_IF: return "IF";
|
52
52
|
case PM_CONTEXT_MAIN: return "MAIN";
|
53
53
|
case PM_CONTEXT_MODULE: return "MODULE";
|
54
|
+
case PM_CONTEXT_NONE: return "NONE";
|
54
55
|
case PM_CONTEXT_PARENS: return "PARENS";
|
55
56
|
case PM_CONTEXT_POSTEXE: return "POSTEXE";
|
56
57
|
case PM_CONTEXT_PREDICATE: return "PREDICATE";
|
@@ -492,7 +493,8 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_
|
|
492
493
|
/**
|
493
494
|
* Append an error to the list of errors on the parser using a format string.
|
494
495
|
*/
|
495
|
-
#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...)
|
496
|
+
#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
|
497
|
+
pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
|
496
498
|
|
497
499
|
/**
|
498
500
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -507,7 +509,8 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
507
509
|
* Append an error to the list of errors on the parser using the given location
|
508
510
|
* using a format string.
|
509
511
|
*/
|
510
|
-
#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...)
|
512
|
+
#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
|
513
|
+
PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
|
511
514
|
|
512
515
|
/**
|
513
516
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -522,7 +525,15 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_
|
|
522
525
|
* Append an error to the list of errors on the parser using the location of the
|
523
526
|
* given node and a format string.
|
524
527
|
*/
|
525
|
-
#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...)
|
528
|
+
#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
|
529
|
+
PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
|
530
|
+
|
531
|
+
/**
|
532
|
+
* Append an error to the list of errors on the parser using the location of the
|
533
|
+
* given node and a format string, and add on the content of the node.
|
534
|
+
*/
|
535
|
+
#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
|
536
|
+
PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
|
526
537
|
|
527
538
|
/**
|
528
539
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -546,7 +557,15 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
|
|
546
557
|
* Append an error to the list of errors on the parser using the location of the
|
547
558
|
* given token and a format string.
|
548
559
|
*/
|
549
|
-
#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...)
|
560
|
+
#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
|
561
|
+
PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
|
562
|
+
|
563
|
+
/**
|
564
|
+
* Append an error to the list of errors on the parser using the location of the
|
565
|
+
* given token and a format string, and add on the content of the token.
|
566
|
+
*/
|
567
|
+
#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
|
568
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
|
550
569
|
|
551
570
|
/**
|
552
571
|
* Append a warning to the list of warnings on the parser.
|
@@ -2890,7 +2909,8 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
|
|
2890
2909
|
static pm_def_node_t *
|
2891
2910
|
pm_def_node_create(
|
2892
2911
|
pm_parser_t *parser,
|
2893
|
-
|
2912
|
+
pm_constant_id_t name,
|
2913
|
+
const pm_token_t *name_loc,
|
2894
2914
|
pm_node_t *receiver,
|
2895
2915
|
pm_parameters_node_t *parameters,
|
2896
2916
|
pm_node_t *body,
|
@@ -2920,8 +2940,8 @@ pm_def_node_create(
|
|
2920
2940
|
.type = PM_DEF_NODE,
|
2921
2941
|
.location = { .start = def_keyword->start, .end = end },
|
2922
2942
|
},
|
2923
|
-
.name =
|
2924
|
-
.name_loc = PM_LOCATION_TOKEN_VALUE(
|
2943
|
+
.name = name,
|
2944
|
+
.name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
|
2925
2945
|
.receiver = receiver,
|
2926
2946
|
.parameters = parameters,
|
2927
2947
|
.body = body,
|
@@ -4642,13 +4662,20 @@ pm_multi_target_node_create(pm_parser_t *parser) {
|
|
4642
4662
|
*/
|
4643
4663
|
static void
|
4644
4664
|
pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
|
4645
|
-
if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)
|
4665
|
+
if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
|
4646
4666
|
if (node->rest == NULL) {
|
4647
4667
|
node->rest = target;
|
4648
4668
|
} else {
|
4649
4669
|
pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
|
4650
4670
|
pm_node_list_append(&node->rights, target);
|
4651
4671
|
}
|
4672
|
+
} else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
|
4673
|
+
if (node->rest == NULL) {
|
4674
|
+
node->rest = target;
|
4675
|
+
} else {
|
4676
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
|
4677
|
+
pm_node_list_append(&node->rights, target);
|
4678
|
+
}
|
4652
4679
|
} else if (node->rest == NULL) {
|
4653
4680
|
pm_node_list_append(&node->lefts, target);
|
4654
4681
|
} else {
|
@@ -7172,7 +7199,7 @@ lex_numeric(pm_parser_t *parser) {
|
|
7172
7199
|
static pm_token_type_t
|
7173
7200
|
lex_global_variable(pm_parser_t *parser) {
|
7174
7201
|
if (parser->current.end >= parser->end) {
|
7175
|
-
|
7202
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
7176
7203
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
7177
7204
|
}
|
7178
7205
|
|
@@ -7213,7 +7240,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
7213
7240
|
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
7214
7241
|
|
7215
7242
|
// $0 isn't allowed to be followed by anything.
|
7216
|
-
|
7243
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
7217
7244
|
}
|
7218
7245
|
|
7219
7246
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -7244,7 +7271,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
7244
7271
|
} else {
|
7245
7272
|
// If we get here, then we have a $ followed by something that isn't
|
7246
7273
|
// recognized as a global variable.
|
7247
|
-
|
7274
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
7248
7275
|
}
|
7249
7276
|
|
7250
7277
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -8148,10 +8175,10 @@ lex_at_variable(pm_parser_t *parser) {
|
|
8148
8175
|
while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
|
8149
8176
|
parser->current.end += width;
|
8150
8177
|
}
|
8151
|
-
} else if (type == PM_TOKEN_CLASS_VARIABLE) {
|
8152
|
-
pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
|
8153
8178
|
} else {
|
8154
|
-
|
8179
|
+
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
|
8180
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8181
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
|
8155
8182
|
}
|
8156
8183
|
|
8157
8184
|
// If we're lexing an embedded variable, then we need to pop back into the
|
@@ -9711,7 +9738,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9711
9738
|
// and then find the first one.
|
9712
9739
|
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
9713
9740
|
const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
|
9714
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9741
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9715
9742
|
|
9716
9743
|
// If we haven't found an escape yet, then this buffer will be
|
9717
9744
|
// unallocated since we can refer directly to the source string.
|
@@ -9720,7 +9747,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9720
9747
|
while (breakpoint != NULL) {
|
9721
9748
|
// If we hit a null byte, skip directly past it.
|
9722
9749
|
if (*breakpoint == '\0') {
|
9723
|
-
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
9750
|
+
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
|
9724
9751
|
continue;
|
9725
9752
|
}
|
9726
9753
|
|
@@ -9739,7 +9766,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9739
9766
|
// we need to continue on past it.
|
9740
9767
|
if (lex_mode->as.list.nesting > 0) {
|
9741
9768
|
parser->current.end = breakpoint + 1;
|
9742
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9769
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9743
9770
|
lex_mode->as.list.nesting--;
|
9744
9771
|
continue;
|
9745
9772
|
}
|
@@ -9824,7 +9851,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9824
9851
|
}
|
9825
9852
|
|
9826
9853
|
token_buffer.cursor = parser->current.end;
|
9827
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9854
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9828
9855
|
continue;
|
9829
9856
|
}
|
9830
9857
|
|
@@ -9837,7 +9864,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9837
9864
|
// that looked like an interpolated class or instance variable
|
9838
9865
|
// like "#@" but wasn't actually. In this case we'll just skip
|
9839
9866
|
// to the next breakpoint.
|
9840
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9867
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9841
9868
|
continue;
|
9842
9869
|
}
|
9843
9870
|
|
@@ -9852,7 +9879,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9852
9879
|
// and find the next breakpoint.
|
9853
9880
|
assert(*breakpoint == lex_mode->as.list.incrementor);
|
9854
9881
|
parser->current.end = breakpoint + 1;
|
9855
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9882
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9856
9883
|
lex_mode->as.list.nesting++;
|
9857
9884
|
continue;
|
9858
9885
|
}
|
@@ -9891,14 +9918,14 @@ parser_lex(pm_parser_t *parser) {
|
|
9891
9918
|
// regular expression. We'll use strpbrk to find the first of these
|
9892
9919
|
// characters.
|
9893
9920
|
const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
|
9894
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9921
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9895
9922
|
pm_token_buffer_t token_buffer = { { 0 }, 0 };
|
9896
9923
|
|
9897
9924
|
while (breakpoint != NULL) {
|
9898
9925
|
// If we hit a null byte, skip directly past it.
|
9899
9926
|
if (*breakpoint == '\0') {
|
9900
9927
|
parser->current.end = breakpoint + 1;
|
9901
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9928
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9902
9929
|
continue;
|
9903
9930
|
}
|
9904
9931
|
|
@@ -9920,7 +9947,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9920
9947
|
// If the terminator is not a newline, then we can set
|
9921
9948
|
// the next breakpoint and continue.
|
9922
9949
|
parser->current.end = breakpoint + 1;
|
9923
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9950
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9924
9951
|
continue;
|
9925
9952
|
}
|
9926
9953
|
}
|
@@ -9930,7 +9957,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9930
9957
|
if (*breakpoint == lex_mode->as.regexp.terminator) {
|
9931
9958
|
if (lex_mode->as.regexp.nesting > 0) {
|
9932
9959
|
parser->current.end = breakpoint + 1;
|
9933
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9960
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9934
9961
|
lex_mode->as.regexp.nesting--;
|
9935
9962
|
continue;
|
9936
9963
|
}
|
@@ -10029,7 +10056,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10029
10056
|
}
|
10030
10057
|
|
10031
10058
|
token_buffer.cursor = parser->current.end;
|
10032
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10059
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
10033
10060
|
continue;
|
10034
10061
|
}
|
10035
10062
|
|
@@ -10042,7 +10069,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10042
10069
|
// something that looked like an interpolated class or
|
10043
10070
|
// instance variable like "#@" but wasn't actually. In
|
10044
10071
|
// this case we'll just skip to the next breakpoint.
|
10045
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10072
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
10046
10073
|
continue;
|
10047
10074
|
}
|
10048
10075
|
|
@@ -10057,7 +10084,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10057
10084
|
// and find the next breakpoint.
|
10058
10085
|
assert(*breakpoint == lex_mode->as.regexp.incrementor);
|
10059
10086
|
parser->current.end = breakpoint + 1;
|
10060
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10087
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
10061
10088
|
lex_mode->as.regexp.nesting++;
|
10062
10089
|
continue;
|
10063
10090
|
}
|
@@ -10093,7 +10120,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10093
10120
|
// string. We'll use strpbrk to find the first of these characters.
|
10094
10121
|
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
10095
10122
|
const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
|
10096
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10123
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10097
10124
|
|
10098
10125
|
// If we haven't found an escape yet, then this buffer will be
|
10099
10126
|
// unallocated since we can refer directly to the source string.
|
@@ -10105,7 +10132,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10105
10132
|
if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
|
10106
10133
|
lex_mode->as.string.nesting++;
|
10107
10134
|
parser->current.end = breakpoint + 1;
|
10108
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10135
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10109
10136
|
continue;
|
10110
10137
|
}
|
10111
10138
|
|
@@ -10117,7 +10144,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10117
10144
|
// to continue on past it.
|
10118
10145
|
if (lex_mode->as.string.nesting > 0) {
|
10119
10146
|
parser->current.end = breakpoint + 1;
|
10120
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10147
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10121
10148
|
lex_mode->as.string.nesting--;
|
10122
10149
|
continue;
|
10123
10150
|
}
|
@@ -10159,7 +10186,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10159
10186
|
if (parser->heredoc_end == NULL) {
|
10160
10187
|
pm_newline_list_append(&parser->newline_list, breakpoint);
|
10161
10188
|
parser->current.end = breakpoint + 1;
|
10162
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10189
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10163
10190
|
continue;
|
10164
10191
|
} else {
|
10165
10192
|
parser->current.end = breakpoint + 1;
|
@@ -10173,7 +10200,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10173
10200
|
case '\0':
|
10174
10201
|
// Skip directly past the null character.
|
10175
10202
|
parser->current.end = breakpoint + 1;
|
10176
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10203
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10177
10204
|
break;
|
10178
10205
|
case '\\': {
|
10179
10206
|
// Here we hit escapes.
|
@@ -10242,7 +10269,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10242
10269
|
}
|
10243
10270
|
|
10244
10271
|
token_buffer.cursor = parser->current.end;
|
10245
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10272
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10246
10273
|
break;
|
10247
10274
|
}
|
10248
10275
|
case '#': {
|
@@ -10253,7 +10280,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10253
10280
|
// looked like an interpolated class or instance variable like "#@"
|
10254
10281
|
// but wasn't actually. In this case we'll just skip to the next
|
10255
10282
|
// breakpoint.
|
10256
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10283
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10257
10284
|
break;
|
10258
10285
|
}
|
10259
10286
|
|
@@ -10381,7 +10408,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10381
10408
|
breakpoints[2] = '\0';
|
10382
10409
|
}
|
10383
10410
|
|
10384
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10411
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10385
10412
|
pm_token_buffer_t token_buffer = { { 0 }, 0 };
|
10386
10413
|
bool was_escaped_newline = false;
|
10387
10414
|
|
@@ -10390,7 +10417,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10390
10417
|
case '\0':
|
10391
10418
|
// Skip directly past the null character.
|
10392
10419
|
parser->current.end = breakpoint + 1;
|
10393
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10420
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10394
10421
|
break;
|
10395
10422
|
case '\n': {
|
10396
10423
|
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
|
@@ -10465,7 +10492,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10465
10492
|
// Otherwise we hit a newline and it wasn't followed by
|
10466
10493
|
// a terminator, so we can continue parsing.
|
10467
10494
|
parser->current.end = breakpoint + 1;
|
10468
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10495
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10469
10496
|
break;
|
10470
10497
|
}
|
10471
10498
|
case '\\': {
|
@@ -10529,7 +10556,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10529
10556
|
}
|
10530
10557
|
|
10531
10558
|
token_buffer.cursor = parser->current.end;
|
10532
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10559
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10533
10560
|
break;
|
10534
10561
|
}
|
10535
10562
|
case '#': {
|
@@ -10541,7 +10568,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10541
10568
|
// or instance variable like "#@" but wasn't
|
10542
10569
|
// actually. In this case we'll just skip to the
|
10543
10570
|
// next breakpoint.
|
10544
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10571
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10545
10572
|
break;
|
10546
10573
|
}
|
10547
10574
|
|
@@ -11054,7 +11081,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
11054
11081
|
return target;
|
11055
11082
|
case PM_BACK_REFERENCE_READ_NODE:
|
11056
11083
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
11057
|
-
|
11084
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
11058
11085
|
return target;
|
11059
11086
|
case PM_GLOBAL_VARIABLE_READ_NODE:
|
11060
11087
|
assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
|
@@ -11192,7 +11219,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
11192
11219
|
}
|
11193
11220
|
case PM_BACK_REFERENCE_READ_NODE:
|
11194
11221
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
11195
|
-
|
11222
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
11196
11223
|
/* fallthrough */
|
11197
11224
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
11198
11225
|
pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
|
@@ -11367,7 +11394,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
11367
11394
|
pm_multi_target_node_targets_append(parser, result, target);
|
11368
11395
|
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
11369
11396
|
// If we get here, then we have a trailing , in a multi target node.
|
11370
|
-
// We'll
|
11397
|
+
// We'll add an implicit rest node to represent this.
|
11371
11398
|
pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
11372
11399
|
pm_multi_target_node_targets_append(parser, result, rest);
|
11373
11400
|
break;
|
@@ -11457,8 +11484,13 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
11457
11484
|
|
11458
11485
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
11459
11486
|
if (context_terminator(context, &parser->current)) break;
|
11460
|
-
} else {
|
11461
|
-
|
11487
|
+
} else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
|
11488
|
+
// This is an inlined version of accept1 because the error that we
|
11489
|
+
// want to add has varargs. If this happens again, we should
|
11490
|
+
// probably extract a helper function.
|
11491
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
11492
|
+
parser->previous.start = parser->previous.end;
|
11493
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
11462
11494
|
}
|
11463
11495
|
}
|
11464
11496
|
|
@@ -13852,7 +13884,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
13852
13884
|
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
13853
13885
|
variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
13854
13886
|
} else {
|
13855
|
-
|
13887
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
|
13856
13888
|
variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
13857
13889
|
}
|
13858
13890
|
}
|
@@ -14161,7 +14193,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14161
14193
|
parser_lex(parser);
|
14162
14194
|
|
14163
14195
|
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
14164
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14196
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
14165
14197
|
// If we get here, then we have an end immediately after a
|
14166
14198
|
// start. In that case we'll create an empty content token and
|
14167
14199
|
// return an uninterpolated string.
|
@@ -14174,7 +14206,6 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14174
14206
|
// If we get here, then we have an end of a label immediately
|
14175
14207
|
// after a start. In that case we'll create an empty symbol
|
14176
14208
|
// node.
|
14177
|
-
pm_token_t opening = not_provided(parser);
|
14178
14209
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
14179
14210
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
|
14180
14211
|
|
@@ -14218,15 +14249,19 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14218
14249
|
parser_lex(parser);
|
14219
14250
|
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
14220
14251
|
|
14221
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14252
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
14222
14253
|
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
14223
14254
|
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
14224
14255
|
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
14225
14256
|
} else if (match1(parser, PM_TOKEN_EOF)) {
|
14226
|
-
pm_parser_err_token(parser, &opening,
|
14257
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
14227
14258
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
14259
|
+
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
|
14260
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14228
14261
|
} else {
|
14229
|
-
|
14262
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
|
14263
|
+
parser->previous.start = parser->previous.end;
|
14264
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
14230
14265
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14231
14266
|
}
|
14232
14267
|
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
@@ -14241,7 +14276,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14241
14276
|
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
14242
14277
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
14243
14278
|
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
14244
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14279
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
14245
14280
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
14246
14281
|
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
14247
14282
|
} else {
|
@@ -14332,6 +14367,29 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14332
14367
|
return current;
|
14333
14368
|
}
|
14334
14369
|
|
14370
|
+
/**
|
14371
|
+
* Append an error to the error list on the parser using the given diagnostic
|
14372
|
+
* ID. This function is a specialization that handles formatting the specific
|
14373
|
+
* kind of error that is being appended.
|
14374
|
+
*/
|
14375
|
+
static void
|
14376
|
+
pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
14377
|
+
switch (diag_id) {
|
14378
|
+
case PM_ERR_HASH_KEY: {
|
14379
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
|
14380
|
+
break;
|
14381
|
+
}
|
14382
|
+
case PM_ERR_UNARY_RECEIVER: {
|
14383
|
+
const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
|
14384
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
|
14385
|
+
break;
|
14386
|
+
}
|
14387
|
+
default:
|
14388
|
+
pm_parser_err_previous(parser, diag_id);
|
14389
|
+
break;
|
14390
|
+
}
|
14391
|
+
}
|
14392
|
+
|
14335
14393
|
/**
|
14336
14394
|
* Parse an expression that begins with the previous node that we just lexed.
|
14337
14395
|
*/
|
@@ -14516,7 +14574,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14516
14574
|
// If we didn't find a terminator and we didn't find a right
|
14517
14575
|
// parenthesis, then this is a syntax error.
|
14518
14576
|
if (!terminator_found) {
|
14519
|
-
|
14577
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
14520
14578
|
}
|
14521
14579
|
|
14522
14580
|
// Parse each statement within the parentheses.
|
@@ -14545,7 +14603,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14545
14603
|
} else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
14546
14604
|
break;
|
14547
14605
|
} else {
|
14548
|
-
|
14606
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
14549
14607
|
}
|
14550
14608
|
}
|
14551
14609
|
|
@@ -15626,10 +15684,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15626
15684
|
* methods to override the unary operators, we should ignore
|
15627
15685
|
* the @ in the same way we do for symbols.
|
15628
15686
|
*/
|
15629
|
-
name.
|
15687
|
+
pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
|
15630
15688
|
|
15631
15689
|
return (pm_node_t *) pm_def_node_create(
|
15632
15690
|
parser,
|
15691
|
+
name_id,
|
15633
15692
|
&name,
|
15634
15693
|
receiver,
|
15635
15694
|
params,
|
@@ -16458,7 +16517,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16458
16517
|
// context of a multiple assignment. We enforce that here. We'll
|
16459
16518
|
// still lex past it though and create a missing node place.
|
16460
16519
|
if (binding_power != PM_BINDING_POWER_STATEMENT) {
|
16461
|
-
|
16520
|
+
pm_parser_err_prefix(parser, diag_id);
|
16462
16521
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
16463
16522
|
}
|
16464
16523
|
|
@@ -16481,7 +16540,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16481
16540
|
parser_lex(parser);
|
16482
16541
|
|
16483
16542
|
pm_token_t operator = parser->previous;
|
16484
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH,
|
16543
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER);
|
16485
16544
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
|
16486
16545
|
|
16487
16546
|
pm_conditional_predicate(receiver);
|
@@ -16491,7 +16550,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16491
16550
|
parser_lex(parser);
|
16492
16551
|
|
16493
16552
|
pm_token_t operator = parser->previous;
|
16494
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16553
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16495
16554
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
|
16496
16555
|
|
16497
16556
|
return (pm_node_t *) node;
|
@@ -16500,7 +16559,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16500
16559
|
parser_lex(parser);
|
16501
16560
|
|
16502
16561
|
pm_token_t operator = parser->previous;
|
16503
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16562
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16504
16563
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
|
16505
16564
|
|
16506
16565
|
return (pm_node_t *) node;
|
@@ -16509,7 +16568,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16509
16568
|
parser_lex(parser);
|
16510
16569
|
|
16511
16570
|
pm_token_t operator = parser->previous;
|
16512
|
-
pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16571
|
+
pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16513
16572
|
|
16514
16573
|
if (accept1(parser, PM_TOKEN_STAR_STAR)) {
|
16515
16574
|
pm_token_t exponent_operator = parser->previous;
|
@@ -16625,7 +16684,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16625
16684
|
parser_lex(parser);
|
16626
16685
|
|
16627
16686
|
pm_token_t operator = parser->previous;
|
16628
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16687
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16629
16688
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
|
16630
16689
|
|
16631
16690
|
return (pm_node_t *) node;
|
@@ -16648,7 +16707,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16648
16707
|
// here because it will provide more context in addition to the
|
16649
16708
|
// recoverable error that we will also add.
|
16650
16709
|
if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
|
16651
|
-
|
16710
|
+
pm_parser_err_prefix(parser, diag_id);
|
16652
16711
|
}
|
16653
16712
|
|
16654
16713
|
// If we get here, then we are assuming this token is closing a
|
@@ -16661,7 +16720,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16661
16720
|
// have an unexpected token.
|
16662
16721
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
|
16663
16722
|
} else {
|
16664
|
-
|
16723
|
+
pm_parser_err_prefix(parser, diag_id);
|
16665
16724
|
}
|
16666
16725
|
|
16667
16726
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
@@ -16710,7 +16769,18 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
|
|
16710
16769
|
if (is_single_value && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
|
16711
16770
|
pm_token_t rescue = parser->current;
|
16712
16771
|
parser_lex(parser);
|
16713
|
-
|
16772
|
+
|
16773
|
+
bool accepts_command_call_inner = false;
|
16774
|
+
|
16775
|
+
// RHS can accept command call iff the value is a call with arguments but without paranthesis.
|
16776
|
+
if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
|
16777
|
+
pm_call_node_t *call_node = (pm_call_node_t *)value;
|
16778
|
+
if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
|
16779
|
+
accepts_command_call_inner = true;
|
16780
|
+
}
|
16781
|
+
}
|
16782
|
+
|
16783
|
+
pm_node_t *right = parse_expression(parser, binding_power, accepts_command_call_inner, PM_ERR_RESCUE_MODIFIER_VALUE);
|
16714
16784
|
|
16715
16785
|
return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
|
16716
16786
|
}
|
@@ -16895,7 +16965,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16895
16965
|
switch (PM_NODE_TYPE(node)) {
|
16896
16966
|
case PM_BACK_REFERENCE_READ_NODE:
|
16897
16967
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
16898
|
-
|
16968
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
16899
16969
|
/* fallthrough */
|
16900
16970
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
16901
16971
|
parser_lex(parser);
|
@@ -17006,7 +17076,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17006
17076
|
switch (PM_NODE_TYPE(node)) {
|
17007
17077
|
case PM_BACK_REFERENCE_READ_NODE:
|
17008
17078
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
17009
|
-
|
17079
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
17010
17080
|
/* fallthrough */
|
17011
17081
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
17012
17082
|
parser_lex(parser);
|
@@ -17127,7 +17197,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17127
17197
|
switch (PM_NODE_TYPE(node)) {
|
17128
17198
|
case PM_BACK_REFERENCE_READ_NODE:
|
17129
17199
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
17130
|
-
|
17200
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
17131
17201
|
/* fallthrough */
|
17132
17202
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
17133
17203
|
parser_lex(parser);
|
@@ -17791,6 +17861,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17791
17861
|
.current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
|
17792
17862
|
.next_start = NULL,
|
17793
17863
|
.heredoc_end = NULL,
|
17864
|
+
.data_loc = { .start = NULL, .end = NULL },
|
17794
17865
|
.comment_list = { 0 },
|
17795
17866
|
.magic_comment_list = { 0 },
|
17796
17867
|
.warning_list = { 0 },
|
@@ -18055,7 +18126,7 @@ typedef struct {
|
|
18055
18126
|
pm_diagnostic_t *error;
|
18056
18127
|
|
18057
18128
|
/** The start line of the diagnostic message. */
|
18058
|
-
|
18129
|
+
int32_t line;
|
18059
18130
|
|
18060
18131
|
/** The column start of the diagnostic message. */
|
18061
18132
|
uint32_t column_start;
|
@@ -18087,12 +18158,13 @@ typedef struct {
|
|
18087
18158
|
#define PM_COLOR_RESET "\033[0m"
|
18088
18159
|
|
18089
18160
|
static inline pm_error_t *
|
18090
|
-
pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
|
18161
|
+
pm_parser_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
|
18091
18162
|
pm_error_t *errors = calloc(error_list->size, sizeof(pm_error_t));
|
18163
|
+
int32_t start_line = parser->start_line;
|
18092
18164
|
|
18093
18165
|
for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
|
18094
|
-
pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start);
|
18095
|
-
pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end);
|
18166
|
+
pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
|
18167
|
+
pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
|
18096
18168
|
|
18097
18169
|
// We're going to insert this error into the array in sorted order. We
|
18098
18170
|
// do this by finding the first error that has a line number greater
|
@@ -18103,8 +18175,8 @@ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_
|
|
18103
18175
|
(index < error_list->size) &&
|
18104
18176
|
(errors[index].error != NULL) &&
|
18105
18177
|
(
|
18106
|
-
(errors[index].line <
|
18107
|
-
(errors[index].line ==
|
18178
|
+
(errors[index].line < start.line) ||
|
18179
|
+
((errors[index].line == start.line) && (errors[index].column_start < start.column))
|
18108
18180
|
)
|
18109
18181
|
) index++;
|
18110
18182
|
|
@@ -18117,18 +18189,18 @@ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_
|
|
18117
18189
|
// Finally, we'll insert the error into the array.
|
18118
18190
|
uint32_t column_end;
|
18119
18191
|
if (start.line == end.line) {
|
18120
|
-
column_end =
|
18192
|
+
column_end = end.column;
|
18121
18193
|
} else {
|
18122
|
-
column_end = (uint32_t) (newline_list->offsets[start.line] - newline_list->offsets[start.line -
|
18194
|
+
column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
|
18123
18195
|
}
|
18124
18196
|
|
18125
18197
|
// Ensure we have at least one column of error.
|
18126
|
-
if (
|
18198
|
+
if (start.column == column_end) column_end++;
|
18127
18199
|
|
18128
18200
|
errors[index] = (pm_error_t) {
|
18129
18201
|
.error = error,
|
18130
|
-
.line =
|
18131
|
-
.column_start =
|
18202
|
+
.line = start.line,
|
18203
|
+
.column_start = start.column,
|
18132
18204
|
.column_end = column_end
|
18133
18205
|
};
|
18134
18206
|
}
|
@@ -18137,17 +18209,19 @@ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_
|
|
18137
18209
|
}
|
18138
18210
|
|
18139
18211
|
static inline void
|
18140
|
-
pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix,
|
18141
|
-
|
18212
|
+
pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, pm_buffer_t *buffer) {
|
18213
|
+
size_t index = (size_t) (line - parser->start_line);
|
18214
|
+
|
18215
|
+
const uint8_t *start = &parser->start[newline_list->offsets[index]];
|
18142
18216
|
const uint8_t *end;
|
18143
18217
|
|
18144
|
-
if (
|
18218
|
+
if (index >= newline_list->size - 1) {
|
18145
18219
|
end = parser->end;
|
18146
18220
|
} else {
|
18147
|
-
end = &parser->start[newline_list->offsets[
|
18221
|
+
end = &parser->start[newline_list->offsets[index + 1]];
|
18148
18222
|
}
|
18149
18223
|
|
18150
|
-
pm_buffer_append_format(buffer, number_prefix,
|
18224
|
+
pm_buffer_append_format(buffer, number_prefix, line);
|
18151
18225
|
pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
|
18152
18226
|
|
18153
18227
|
if (end == parser->end && end[-1] != '\n') {
|
@@ -18165,25 +18239,26 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18165
18239
|
|
18166
18240
|
// First, we're going to sort all of the errors by line number using an
|
18167
18241
|
// insertion sort into a newly allocated array.
|
18242
|
+
const int32_t start_line = parser->start_line;
|
18168
18243
|
const pm_newline_list_t *newline_list = &parser->newline_list;
|
18169
|
-
pm_error_t *errors = pm_parser_errors_format_sort(error_list, newline_list);
|
18244
|
+
pm_error_t *errors = pm_parser_errors_format_sort(parser, error_list, newline_list);
|
18170
18245
|
|
18171
18246
|
// Now we're going to determine how we're going to format line numbers and
|
18172
18247
|
// blank lines based on the maximum number of digits in the line numbers
|
18173
18248
|
// that are going to be displayed.
|
18174
18249
|
pm_error_format_t error_format;
|
18175
|
-
|
18250
|
+
int32_t max_line_number = errors[error_list->size - 1].line - start_line;
|
18176
18251
|
|
18177
18252
|
if (max_line_number < 10) {
|
18178
18253
|
if (colorize) {
|
18179
18254
|
error_format = (pm_error_format_t) {
|
18180
|
-
.number_prefix = PM_COLOR_GRAY "%1"
|
18255
|
+
.number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
|
18181
18256
|
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18182
18257
|
.divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
|
18183
18258
|
};
|
18184
18259
|
} else {
|
18185
18260
|
error_format = (pm_error_format_t) {
|
18186
|
-
.number_prefix = "%1"
|
18261
|
+
.number_prefix = "%1" PRIi32 " | ",
|
18187
18262
|
.blank_prefix = " | ",
|
18188
18263
|
.divider = " ~~~~~\n"
|
18189
18264
|
};
|
@@ -18191,13 +18266,13 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18191
18266
|
} else if (max_line_number < 100) {
|
18192
18267
|
if (colorize) {
|
18193
18268
|
error_format = (pm_error_format_t) {
|
18194
|
-
.number_prefix = PM_COLOR_GRAY "%2"
|
18269
|
+
.number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
|
18195
18270
|
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18196
18271
|
.divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
|
18197
18272
|
};
|
18198
18273
|
} else {
|
18199
18274
|
error_format = (pm_error_format_t) {
|
18200
|
-
.number_prefix = "%2"
|
18275
|
+
.number_prefix = "%2" PRIi32 " | ",
|
18201
18276
|
.blank_prefix = " | ",
|
18202
18277
|
.divider = " ~~~~~~\n"
|
18203
18278
|
};
|
@@ -18205,13 +18280,13 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18205
18280
|
} else if (max_line_number < 1000) {
|
18206
18281
|
if (colorize) {
|
18207
18282
|
error_format = (pm_error_format_t) {
|
18208
|
-
.number_prefix = PM_COLOR_GRAY "%3"
|
18283
|
+
.number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
|
18209
18284
|
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18210
18285
|
.divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
|
18211
18286
|
};
|
18212
18287
|
} else {
|
18213
18288
|
error_format = (pm_error_format_t) {
|
18214
|
-
.number_prefix = "%3"
|
18289
|
+
.number_prefix = "%3" PRIi32 " | ",
|
18215
18290
|
.blank_prefix = " | ",
|
18216
18291
|
.divider = " ~~~~~~~\n"
|
18217
18292
|
};
|
@@ -18219,13 +18294,13 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18219
18294
|
} else if (max_line_number < 10000) {
|
18220
18295
|
if (colorize) {
|
18221
18296
|
error_format = (pm_error_format_t) {
|
18222
|
-
.number_prefix = PM_COLOR_GRAY "%4"
|
18297
|
+
.number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
|
18223
18298
|
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18224
18299
|
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
18225
18300
|
};
|
18226
18301
|
} else {
|
18227
18302
|
error_format = (pm_error_format_t) {
|
18228
|
-
.number_prefix = "%4"
|
18303
|
+
.number_prefix = "%4" PRIi32 " | ",
|
18229
18304
|
.blank_prefix = " | ",
|
18230
18305
|
.divider = " ~~~~~~~~\n"
|
18231
18306
|
};
|
@@ -18233,13 +18308,13 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18233
18308
|
} else {
|
18234
18309
|
if (colorize) {
|
18235
18310
|
error_format = (pm_error_format_t) {
|
18236
|
-
.number_prefix = PM_COLOR_GRAY "%5"
|
18311
|
+
.number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
|
18237
18312
|
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18238
18313
|
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
18239
18314
|
};
|
18240
18315
|
} else {
|
18241
18316
|
error_format = (pm_error_format_t) {
|
18242
|
-
.number_prefix = "%5"
|
18317
|
+
.number_prefix = "%5" PRIi32 " | ",
|
18243
18318
|
.blank_prefix = " | ",
|
18244
18319
|
.divider = " ~~~~~~~~\n"
|
18245
18320
|
};
|
@@ -18254,7 +18329,7 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18254
18329
|
// the source before the error to give some context. We'll be careful not to
|
18255
18330
|
// display the same line twice in case the errors are close enough in the
|
18256
18331
|
// source.
|
18257
|
-
|
18332
|
+
int32_t last_line = 0;
|
18258
18333
|
const pm_encoding_t *encoding = parser->encoding;
|
18259
18334
|
|
18260
18335
|
for (size_t index = 0; index < error_list->size; index++) {
|
@@ -18300,7 +18375,7 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18300
18375
|
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
|
18301
18376
|
|
18302
18377
|
size_t column = 0;
|
18303
|
-
const uint8_t *start = &parser->start[newline_list->offsets[error->line -
|
18378
|
+
const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
|
18304
18379
|
|
18305
18380
|
while (column < error->column_end) {
|
18306
18381
|
if (column < error->column_start) {
|
@@ -18324,7 +18399,7 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18324
18399
|
// Here we determine how many lines of padding to display after the
|
18325
18400
|
// error, depending on where the next error is in source.
|
18326
18401
|
last_line = error->line;
|
18327
|
-
|
18402
|
+
int32_t next_line = (index == error_list->size - 1) ? ((int32_t) newline_list->size) : errors[index + 1].line;
|
18328
18403
|
|
18329
18404
|
if (next_line - last_line > 1) {
|
18330
18405
|
pm_buffer_append_string(buffer, " ", 2);
|