prism 0.22.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +39 -1
- data/README.md +2 -1
- data/docs/releasing.md +67 -17
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +2 -0
- data/ext/prism/api_node.c +1982 -1538
- data/ext/prism/extension.c +12 -7
- data/ext/prism/extension.h +2 -2
- data/include/prism/diagnostic.h +3 -4
- data/include/prism/encoding.h +7 -0
- data/include/prism/util/pm_constant_pool.h +1 -1
- data/include/prism/util/pm_newline_list.h +4 -3
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/lib/prism/desugar_compiler.rb +225 -80
- data/lib/prism/dsl.rb +302 -299
- data/lib/prism/ffi.rb +103 -77
- data/lib/prism/lex_compat.rb +1 -0
- data/lib/prism/node.rb +3624 -2114
- data/lib/prism/node_ext.rb +25 -2
- data/lib/prism/parse_result.rb +56 -19
- data/lib/prism/serialize.rb +605 -303
- data/lib/prism/translation/parser/compiler.rb +1 -1
- data/lib/prism/translation/parser/rubocop.rb +11 -3
- data/lib/prism/translation/parser.rb +25 -12
- data/lib/prism/translation/parser33.rb +12 -0
- data/lib/prism/translation/parser34.rb +12 -0
- data/lib/prism/translation/ripper.rb +696 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +3 -3
- data/lib/prism.rb +0 -1
- data/prism.gemspec +6 -2
- data/src/diagnostic.c +10 -11
- data/src/encoding.c +16 -17
- data/src/options.c +7 -2
- data/src/prettyprint.c +3 -3
- data/src/prism.c +172 -97
- data/src/serialize.c +24 -13
- data/src/token_type.c +3 -3
- data/src/util/pm_constant_pool.c +1 -1
- data/src/util/pm_newline_list.c +6 -3
- data/src/util/pm_strpbrk.c +122 -14
- metadata +8 -4
- data/lib/prism/ripper_compat.rb +0 -285
data/src/prism.c
CHANGED
@@ -51,6 +51,7 @@ debug_context(pm_context_t context) {
|
|
51
51
|
case PM_CONTEXT_IF: return "IF";
|
52
52
|
case PM_CONTEXT_MAIN: return "MAIN";
|
53
53
|
case PM_CONTEXT_MODULE: return "MODULE";
|
54
|
+
case PM_CONTEXT_NONE: return "NONE";
|
54
55
|
case PM_CONTEXT_PARENS: return "PARENS";
|
55
56
|
case PM_CONTEXT_POSTEXE: return "POSTEXE";
|
56
57
|
case PM_CONTEXT_PREDICATE: return "PREDICATE";
|
@@ -492,7 +493,8 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_
|
|
492
493
|
/**
|
493
494
|
* Append an error to the list of errors on the parser using a format string.
|
494
495
|
*/
|
495
|
-
#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...)
|
496
|
+
#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
|
497
|
+
pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
|
496
498
|
|
497
499
|
/**
|
498
500
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -507,7 +509,8 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
507
509
|
* Append an error to the list of errors on the parser using the given location
|
508
510
|
* using a format string.
|
509
511
|
*/
|
510
|
-
#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...)
|
512
|
+
#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
|
513
|
+
PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
|
511
514
|
|
512
515
|
/**
|
513
516
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -522,7 +525,15 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_
|
|
522
525
|
* Append an error to the list of errors on the parser using the location of the
|
523
526
|
* given node and a format string.
|
524
527
|
*/
|
525
|
-
#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...)
|
528
|
+
#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
|
529
|
+
PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
|
530
|
+
|
531
|
+
/**
|
532
|
+
* Append an error to the list of errors on the parser using the location of the
|
533
|
+
* given node and a format string, and add on the content of the node.
|
534
|
+
*/
|
535
|
+
#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
|
536
|
+
PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
|
526
537
|
|
527
538
|
/**
|
528
539
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -546,7 +557,15 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
|
|
546
557
|
* Append an error to the list of errors on the parser using the location of the
|
547
558
|
* given token and a format string.
|
548
559
|
*/
|
549
|
-
#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...)
|
560
|
+
#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
|
561
|
+
PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
|
562
|
+
|
563
|
+
/**
|
564
|
+
* Append an error to the list of errors on the parser using the location of the
|
565
|
+
* given token and a format string, and add on the content of the token.
|
566
|
+
*/
|
567
|
+
#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
|
568
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
|
550
569
|
|
551
570
|
/**
|
552
571
|
* Append a warning to the list of warnings on the parser.
|
@@ -2890,7 +2909,8 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
|
|
2890
2909
|
static pm_def_node_t *
|
2891
2910
|
pm_def_node_create(
|
2892
2911
|
pm_parser_t *parser,
|
2893
|
-
|
2912
|
+
pm_constant_id_t name,
|
2913
|
+
const pm_token_t *name_loc,
|
2894
2914
|
pm_node_t *receiver,
|
2895
2915
|
pm_parameters_node_t *parameters,
|
2896
2916
|
pm_node_t *body,
|
@@ -2920,8 +2940,8 @@ pm_def_node_create(
|
|
2920
2940
|
.type = PM_DEF_NODE,
|
2921
2941
|
.location = { .start = def_keyword->start, .end = end },
|
2922
2942
|
},
|
2923
|
-
.name =
|
2924
|
-
.name_loc = PM_LOCATION_TOKEN_VALUE(
|
2943
|
+
.name = name,
|
2944
|
+
.name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
|
2925
2945
|
.receiver = receiver,
|
2926
2946
|
.parameters = parameters,
|
2927
2947
|
.body = body,
|
@@ -4642,13 +4662,20 @@ pm_multi_target_node_create(pm_parser_t *parser) {
|
|
4642
4662
|
*/
|
4643
4663
|
static void
|
4644
4664
|
pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
|
4645
|
-
if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)
|
4665
|
+
if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
|
4646
4666
|
if (node->rest == NULL) {
|
4647
4667
|
node->rest = target;
|
4648
4668
|
} else {
|
4649
4669
|
pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
|
4650
4670
|
pm_node_list_append(&node->rights, target);
|
4651
4671
|
}
|
4672
|
+
} else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
|
4673
|
+
if (node->rest == NULL) {
|
4674
|
+
node->rest = target;
|
4675
|
+
} else {
|
4676
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
|
4677
|
+
pm_node_list_append(&node->rights, target);
|
4678
|
+
}
|
4652
4679
|
} else if (node->rest == NULL) {
|
4653
4680
|
pm_node_list_append(&node->lefts, target);
|
4654
4681
|
} else {
|
@@ -7172,7 +7199,7 @@ lex_numeric(pm_parser_t *parser) {
|
|
7172
7199
|
static pm_token_type_t
|
7173
7200
|
lex_global_variable(pm_parser_t *parser) {
|
7174
7201
|
if (parser->current.end >= parser->end) {
|
7175
|
-
|
7202
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
7176
7203
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
7177
7204
|
}
|
7178
7205
|
|
@@ -7213,7 +7240,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
7213
7240
|
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
7214
7241
|
|
7215
7242
|
// $0 isn't allowed to be followed by anything.
|
7216
|
-
|
7243
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
7217
7244
|
}
|
7218
7245
|
|
7219
7246
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -7244,7 +7271,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
7244
7271
|
} else {
|
7245
7272
|
// If we get here, then we have a $ followed by something that isn't
|
7246
7273
|
// recognized as a global variable.
|
7247
|
-
|
7274
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
7248
7275
|
}
|
7249
7276
|
|
7250
7277
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -8148,10 +8175,10 @@ lex_at_variable(pm_parser_t *parser) {
|
|
8148
8175
|
while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
|
8149
8176
|
parser->current.end += width;
|
8150
8177
|
}
|
8151
|
-
} else if (type == PM_TOKEN_CLASS_VARIABLE) {
|
8152
|
-
pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
|
8153
8178
|
} else {
|
8154
|
-
|
8179
|
+
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
|
8180
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8181
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
|
8155
8182
|
}
|
8156
8183
|
|
8157
8184
|
// If we're lexing an embedded variable, then we need to pop back into the
|
@@ -9711,7 +9738,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9711
9738
|
// and then find the first one.
|
9712
9739
|
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
9713
9740
|
const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
|
9714
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9741
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9715
9742
|
|
9716
9743
|
// If we haven't found an escape yet, then this buffer will be
|
9717
9744
|
// unallocated since we can refer directly to the source string.
|
@@ -9720,7 +9747,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9720
9747
|
while (breakpoint != NULL) {
|
9721
9748
|
// If we hit a null byte, skip directly past it.
|
9722
9749
|
if (*breakpoint == '\0') {
|
9723
|
-
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
9750
|
+
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
|
9724
9751
|
continue;
|
9725
9752
|
}
|
9726
9753
|
|
@@ -9739,7 +9766,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9739
9766
|
// we need to continue on past it.
|
9740
9767
|
if (lex_mode->as.list.nesting > 0) {
|
9741
9768
|
parser->current.end = breakpoint + 1;
|
9742
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9769
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9743
9770
|
lex_mode->as.list.nesting--;
|
9744
9771
|
continue;
|
9745
9772
|
}
|
@@ -9824,7 +9851,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9824
9851
|
}
|
9825
9852
|
|
9826
9853
|
token_buffer.cursor = parser->current.end;
|
9827
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9854
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9828
9855
|
continue;
|
9829
9856
|
}
|
9830
9857
|
|
@@ -9837,7 +9864,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9837
9864
|
// that looked like an interpolated class or instance variable
|
9838
9865
|
// like "#@" but wasn't actually. In this case we'll just skip
|
9839
9866
|
// to the next breakpoint.
|
9840
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9867
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9841
9868
|
continue;
|
9842
9869
|
}
|
9843
9870
|
|
@@ -9852,7 +9879,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9852
9879
|
// and find the next breakpoint.
|
9853
9880
|
assert(*breakpoint == lex_mode->as.list.incrementor);
|
9854
9881
|
parser->current.end = breakpoint + 1;
|
9855
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9882
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9856
9883
|
lex_mode->as.list.nesting++;
|
9857
9884
|
continue;
|
9858
9885
|
}
|
@@ -9891,14 +9918,14 @@ parser_lex(pm_parser_t *parser) {
|
|
9891
9918
|
// regular expression. We'll use strpbrk to find the first of these
|
9892
9919
|
// characters.
|
9893
9920
|
const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
|
9894
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9921
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9895
9922
|
pm_token_buffer_t token_buffer = { { 0 }, 0 };
|
9896
9923
|
|
9897
9924
|
while (breakpoint != NULL) {
|
9898
9925
|
// If we hit a null byte, skip directly past it.
|
9899
9926
|
if (*breakpoint == '\0') {
|
9900
9927
|
parser->current.end = breakpoint + 1;
|
9901
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9928
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9902
9929
|
continue;
|
9903
9930
|
}
|
9904
9931
|
|
@@ -9920,7 +9947,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9920
9947
|
// If the terminator is not a newline, then we can set
|
9921
9948
|
// the next breakpoint and continue.
|
9922
9949
|
parser->current.end = breakpoint + 1;
|
9923
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9950
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9924
9951
|
continue;
|
9925
9952
|
}
|
9926
9953
|
}
|
@@ -9930,7 +9957,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9930
9957
|
if (*breakpoint == lex_mode->as.regexp.terminator) {
|
9931
9958
|
if (lex_mode->as.regexp.nesting > 0) {
|
9932
9959
|
parser->current.end = breakpoint + 1;
|
9933
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9960
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9934
9961
|
lex_mode->as.regexp.nesting--;
|
9935
9962
|
continue;
|
9936
9963
|
}
|
@@ -10029,7 +10056,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10029
10056
|
}
|
10030
10057
|
|
10031
10058
|
token_buffer.cursor = parser->current.end;
|
10032
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10059
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
10033
10060
|
continue;
|
10034
10061
|
}
|
10035
10062
|
|
@@ -10042,7 +10069,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10042
10069
|
// something that looked like an interpolated class or
|
10043
10070
|
// instance variable like "#@" but wasn't actually. In
|
10044
10071
|
// this case we'll just skip to the next breakpoint.
|
10045
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10072
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
10046
10073
|
continue;
|
10047
10074
|
}
|
10048
10075
|
|
@@ -10057,7 +10084,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10057
10084
|
// and find the next breakpoint.
|
10058
10085
|
assert(*breakpoint == lex_mode->as.regexp.incrementor);
|
10059
10086
|
parser->current.end = breakpoint + 1;
|
10060
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10087
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
10061
10088
|
lex_mode->as.regexp.nesting++;
|
10062
10089
|
continue;
|
10063
10090
|
}
|
@@ -10093,7 +10120,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10093
10120
|
// string. We'll use strpbrk to find the first of these characters.
|
10094
10121
|
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
10095
10122
|
const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
|
10096
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10123
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10097
10124
|
|
10098
10125
|
// If we haven't found an escape yet, then this buffer will be
|
10099
10126
|
// unallocated since we can refer directly to the source string.
|
@@ -10105,7 +10132,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10105
10132
|
if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
|
10106
10133
|
lex_mode->as.string.nesting++;
|
10107
10134
|
parser->current.end = breakpoint + 1;
|
10108
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10135
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10109
10136
|
continue;
|
10110
10137
|
}
|
10111
10138
|
|
@@ -10117,7 +10144,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10117
10144
|
// to continue on past it.
|
10118
10145
|
if (lex_mode->as.string.nesting > 0) {
|
10119
10146
|
parser->current.end = breakpoint + 1;
|
10120
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10147
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10121
10148
|
lex_mode->as.string.nesting--;
|
10122
10149
|
continue;
|
10123
10150
|
}
|
@@ -10159,7 +10186,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10159
10186
|
if (parser->heredoc_end == NULL) {
|
10160
10187
|
pm_newline_list_append(&parser->newline_list, breakpoint);
|
10161
10188
|
parser->current.end = breakpoint + 1;
|
10162
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10189
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10163
10190
|
continue;
|
10164
10191
|
} else {
|
10165
10192
|
parser->current.end = breakpoint + 1;
|
@@ -10173,7 +10200,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10173
10200
|
case '\0':
|
10174
10201
|
// Skip directly past the null character.
|
10175
10202
|
parser->current.end = breakpoint + 1;
|
10176
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10203
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10177
10204
|
break;
|
10178
10205
|
case '\\': {
|
10179
10206
|
// Here we hit escapes.
|
@@ -10242,7 +10269,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10242
10269
|
}
|
10243
10270
|
|
10244
10271
|
token_buffer.cursor = parser->current.end;
|
10245
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10272
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10246
10273
|
break;
|
10247
10274
|
}
|
10248
10275
|
case '#': {
|
@@ -10253,7 +10280,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10253
10280
|
// looked like an interpolated class or instance variable like "#@"
|
10254
10281
|
// but wasn't actually. In this case we'll just skip to the next
|
10255
10282
|
// breakpoint.
|
10256
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10283
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10257
10284
|
break;
|
10258
10285
|
}
|
10259
10286
|
|
@@ -10381,7 +10408,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10381
10408
|
breakpoints[2] = '\0';
|
10382
10409
|
}
|
10383
10410
|
|
10384
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10411
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10385
10412
|
pm_token_buffer_t token_buffer = { { 0 }, 0 };
|
10386
10413
|
bool was_escaped_newline = false;
|
10387
10414
|
|
@@ -10390,7 +10417,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10390
10417
|
case '\0':
|
10391
10418
|
// Skip directly past the null character.
|
10392
10419
|
parser->current.end = breakpoint + 1;
|
10393
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10420
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10394
10421
|
break;
|
10395
10422
|
case '\n': {
|
10396
10423
|
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
|
@@ -10465,7 +10492,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10465
10492
|
// Otherwise we hit a newline and it wasn't followed by
|
10466
10493
|
// a terminator, so we can continue parsing.
|
10467
10494
|
parser->current.end = breakpoint + 1;
|
10468
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10495
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10469
10496
|
break;
|
10470
10497
|
}
|
10471
10498
|
case '\\': {
|
@@ -10529,7 +10556,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10529
10556
|
}
|
10530
10557
|
|
10531
10558
|
token_buffer.cursor = parser->current.end;
|
10532
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10559
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10533
10560
|
break;
|
10534
10561
|
}
|
10535
10562
|
case '#': {
|
@@ -10541,7 +10568,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10541
10568
|
// or instance variable like "#@" but wasn't
|
10542
10569
|
// actually. In this case we'll just skip to the
|
10543
10570
|
// next breakpoint.
|
10544
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10571
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10545
10572
|
break;
|
10546
10573
|
}
|
10547
10574
|
|
@@ -11054,7 +11081,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
11054
11081
|
return target;
|
11055
11082
|
case PM_BACK_REFERENCE_READ_NODE:
|
11056
11083
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
11057
|
-
|
11084
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
11058
11085
|
return target;
|
11059
11086
|
case PM_GLOBAL_VARIABLE_READ_NODE:
|
11060
11087
|
assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
|
@@ -11192,7 +11219,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
11192
11219
|
}
|
11193
11220
|
case PM_BACK_REFERENCE_READ_NODE:
|
11194
11221
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
11195
|
-
|
11222
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
11196
11223
|
/* fallthrough */
|
11197
11224
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
11198
11225
|
pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
|
@@ -11367,7 +11394,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
11367
11394
|
pm_multi_target_node_targets_append(parser, result, target);
|
11368
11395
|
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
11369
11396
|
// If we get here, then we have a trailing , in a multi target node.
|
11370
|
-
// We'll
|
11397
|
+
// We'll add an implicit rest node to represent this.
|
11371
11398
|
pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
11372
11399
|
pm_multi_target_node_targets_append(parser, result, rest);
|
11373
11400
|
break;
|
@@ -11457,8 +11484,13 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
11457
11484
|
|
11458
11485
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
11459
11486
|
if (context_terminator(context, &parser->current)) break;
|
11460
|
-
} else {
|
11461
|
-
|
11487
|
+
} else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
|
11488
|
+
// This is an inlined version of accept1 because the error that we
|
11489
|
+
// want to add has varargs. If this happens again, we should
|
11490
|
+
// probably extract a helper function.
|
11491
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
11492
|
+
parser->previous.start = parser->previous.end;
|
11493
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
11462
11494
|
}
|
11463
11495
|
}
|
11464
11496
|
|
@@ -13852,7 +13884,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
13852
13884
|
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
13853
13885
|
variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
13854
13886
|
} else {
|
13855
|
-
|
13887
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
|
13856
13888
|
variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
13857
13889
|
}
|
13858
13890
|
}
|
@@ -14161,7 +14193,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14161
14193
|
parser_lex(parser);
|
14162
14194
|
|
14163
14195
|
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
14164
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14196
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
14165
14197
|
// If we get here, then we have an end immediately after a
|
14166
14198
|
// start. In that case we'll create an empty content token and
|
14167
14199
|
// return an uninterpolated string.
|
@@ -14174,7 +14206,6 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14174
14206
|
// If we get here, then we have an end of a label immediately
|
14175
14207
|
// after a start. In that case we'll create an empty symbol
|
14176
14208
|
// node.
|
14177
|
-
pm_token_t opening = not_provided(parser);
|
14178
14209
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
14179
14210
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
|
14180
14211
|
|
@@ -14218,15 +14249,19 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14218
14249
|
parser_lex(parser);
|
14219
14250
|
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
14220
14251
|
|
14221
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14252
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
14222
14253
|
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
14223
14254
|
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
14224
14255
|
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
14225
14256
|
} else if (match1(parser, PM_TOKEN_EOF)) {
|
14226
|
-
pm_parser_err_token(parser, &opening,
|
14257
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
14227
14258
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
14259
|
+
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
|
14260
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14228
14261
|
} else {
|
14229
|
-
|
14262
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
|
14263
|
+
parser->previous.start = parser->previous.end;
|
14264
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
14230
14265
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14231
14266
|
}
|
14232
14267
|
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
@@ -14241,7 +14276,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14241
14276
|
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
14242
14277
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
14243
14278
|
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
14244
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14279
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
14245
14280
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
14246
14281
|
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
14247
14282
|
} else {
|
@@ -14332,6 +14367,29 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14332
14367
|
return current;
|
14333
14368
|
}
|
14334
14369
|
|
14370
|
+
/**
|
14371
|
+
* Append an error to the error list on the parser using the given diagnostic
|
14372
|
+
* ID. This function is a specialization that handles formatting the specific
|
14373
|
+
* kind of error that is being appended.
|
14374
|
+
*/
|
14375
|
+
static void
|
14376
|
+
pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
14377
|
+
switch (diag_id) {
|
14378
|
+
case PM_ERR_HASH_KEY: {
|
14379
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
|
14380
|
+
break;
|
14381
|
+
}
|
14382
|
+
case PM_ERR_UNARY_RECEIVER: {
|
14383
|
+
const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
|
14384
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
|
14385
|
+
break;
|
14386
|
+
}
|
14387
|
+
default:
|
14388
|
+
pm_parser_err_previous(parser, diag_id);
|
14389
|
+
break;
|
14390
|
+
}
|
14391
|
+
}
|
14392
|
+
|
14335
14393
|
/**
|
14336
14394
|
* Parse an expression that begins with the previous node that we just lexed.
|
14337
14395
|
*/
|
@@ -14516,7 +14574,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14516
14574
|
// If we didn't find a terminator and we didn't find a right
|
14517
14575
|
// parenthesis, then this is a syntax error.
|
14518
14576
|
if (!terminator_found) {
|
14519
|
-
|
14577
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
14520
14578
|
}
|
14521
14579
|
|
14522
14580
|
// Parse each statement within the parentheses.
|
@@ -14545,7 +14603,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14545
14603
|
} else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
14546
14604
|
break;
|
14547
14605
|
} else {
|
14548
|
-
|
14606
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
14549
14607
|
}
|
14550
14608
|
}
|
14551
14609
|
|
@@ -15626,10 +15684,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15626
15684
|
* methods to override the unary operators, we should ignore
|
15627
15685
|
* the @ in the same way we do for symbols.
|
15628
15686
|
*/
|
15629
|
-
name.
|
15687
|
+
pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
|
15630
15688
|
|
15631
15689
|
return (pm_node_t *) pm_def_node_create(
|
15632
15690
|
parser,
|
15691
|
+
name_id,
|
15633
15692
|
&name,
|
15634
15693
|
receiver,
|
15635
15694
|
params,
|
@@ -16458,7 +16517,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16458
16517
|
// context of a multiple assignment. We enforce that here. We'll
|
16459
16518
|
// still lex past it though and create a missing node place.
|
16460
16519
|
if (binding_power != PM_BINDING_POWER_STATEMENT) {
|
16461
|
-
|
16520
|
+
pm_parser_err_prefix(parser, diag_id);
|
16462
16521
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
16463
16522
|
}
|
16464
16523
|
|
@@ -16481,7 +16540,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16481
16540
|
parser_lex(parser);
|
16482
16541
|
|
16483
16542
|
pm_token_t operator = parser->previous;
|
16484
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH,
|
16543
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER);
|
16485
16544
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
|
16486
16545
|
|
16487
16546
|
pm_conditional_predicate(receiver);
|
@@ -16491,7 +16550,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16491
16550
|
parser_lex(parser);
|
16492
16551
|
|
16493
16552
|
pm_token_t operator = parser->previous;
|
16494
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16553
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16495
16554
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
|
16496
16555
|
|
16497
16556
|
return (pm_node_t *) node;
|
@@ -16500,7 +16559,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16500
16559
|
parser_lex(parser);
|
16501
16560
|
|
16502
16561
|
pm_token_t operator = parser->previous;
|
16503
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16562
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16504
16563
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
|
16505
16564
|
|
16506
16565
|
return (pm_node_t *) node;
|
@@ -16509,7 +16568,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16509
16568
|
parser_lex(parser);
|
16510
16569
|
|
16511
16570
|
pm_token_t operator = parser->previous;
|
16512
|
-
pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16571
|
+
pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16513
16572
|
|
16514
16573
|
if (accept1(parser, PM_TOKEN_STAR_STAR)) {
|
16515
16574
|
pm_token_t exponent_operator = parser->previous;
|
@@ -16625,7 +16684,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16625
16684
|
parser_lex(parser);
|
16626
16685
|
|
16627
16686
|
pm_token_t operator = parser->previous;
|
16628
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16687
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16629
16688
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
|
16630
16689
|
|
16631
16690
|
return (pm_node_t *) node;
|
@@ -16648,7 +16707,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16648
16707
|
// here because it will provide more context in addition to the
|
16649
16708
|
// recoverable error that we will also add.
|
16650
16709
|
if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
|
16651
|
-
|
16710
|
+
pm_parser_err_prefix(parser, diag_id);
|
16652
16711
|
}
|
16653
16712
|
|
16654
16713
|
// If we get here, then we are assuming this token is closing a
|
@@ -16661,7 +16720,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16661
16720
|
// have an unexpected token.
|
16662
16721
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
|
16663
16722
|
} else {
|
16664
|
-
|
16723
|
+
pm_parser_err_prefix(parser, diag_id);
|
16665
16724
|
}
|
16666
16725
|
|
16667
16726
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
@@ -16710,7 +16769,18 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
|
|
16710
16769
|
if (is_single_value && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
|
16711
16770
|
pm_token_t rescue = parser->current;
|
16712
16771
|
parser_lex(parser);
|
16713
|
-
|
16772
|
+
|
16773
|
+
bool accepts_command_call_inner = false;
|
16774
|
+
|
16775
|
+
// RHS can accept command call iff the value is a call with arguments but without paranthesis.
|
16776
|
+
if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
|
16777
|
+
pm_call_node_t *call_node = (pm_call_node_t *)value;
|
16778
|
+
if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
|
16779
|
+
accepts_command_call_inner = true;
|
16780
|
+
}
|
16781
|
+
}
|
16782
|
+
|
16783
|
+
pm_node_t *right = parse_expression(parser, binding_power, accepts_command_call_inner, PM_ERR_RESCUE_MODIFIER_VALUE);
|
16714
16784
|
|
16715
16785
|
return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
|
16716
16786
|
}
|
@@ -16895,7 +16965,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16895
16965
|
switch (PM_NODE_TYPE(node)) {
|
16896
16966
|
case PM_BACK_REFERENCE_READ_NODE:
|
16897
16967
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
16898
|
-
|
16968
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
16899
16969
|
/* fallthrough */
|
16900
16970
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
16901
16971
|
parser_lex(parser);
|
@@ -17006,7 +17076,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17006
17076
|
switch (PM_NODE_TYPE(node)) {
|
17007
17077
|
case PM_BACK_REFERENCE_READ_NODE:
|
17008
17078
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
17009
|
-
|
17079
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
17010
17080
|
/* fallthrough */
|
17011
17081
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
17012
17082
|
parser_lex(parser);
|
@@ -17127,7 +17197,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17127
17197
|
switch (PM_NODE_TYPE(node)) {
|
17128
17198
|
case PM_BACK_REFERENCE_READ_NODE:
|
17129
17199
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
17130
|
-
|
17200
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
17131
17201
|
/* fallthrough */
|
17132
17202
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
17133
17203
|
parser_lex(parser);
|
@@ -17791,6 +17861,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17791
17861
|
.current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
|
17792
17862
|
.next_start = NULL,
|
17793
17863
|
.heredoc_end = NULL,
|
17864
|
+
.data_loc = { .start = NULL, .end = NULL },
|
17794
17865
|
.comment_list = { 0 },
|
17795
17866
|
.magic_comment_list = { 0 },
|
17796
17867
|
.warning_list = { 0 },
|
@@ -18055,7 +18126,7 @@ typedef struct {
|
|
18055
18126
|
pm_diagnostic_t *error;
|
18056
18127
|
|
18057
18128
|
/** The start line of the diagnostic message. */
|
18058
|
-
|
18129
|
+
int32_t line;
|
18059
18130
|
|
18060
18131
|
/** The column start of the diagnostic message. */
|
18061
18132
|
uint32_t column_start;
|
@@ -18087,12 +18158,13 @@ typedef struct {
|
|
18087
18158
|
#define PM_COLOR_RESET "\033[0m"
|
18088
18159
|
|
18089
18160
|
static inline pm_error_t *
|
18090
|
-
pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
|
18161
|
+
pm_parser_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
|
18091
18162
|
pm_error_t *errors = calloc(error_list->size, sizeof(pm_error_t));
|
18163
|
+
int32_t start_line = parser->start_line;
|
18092
18164
|
|
18093
18165
|
for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
|
18094
|
-
pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start);
|
18095
|
-
pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end);
|
18166
|
+
pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
|
18167
|
+
pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
|
18096
18168
|
|
18097
18169
|
// We're going to insert this error into the array in sorted order. We
|
18098
18170
|
// do this by finding the first error that has a line number greater
|
@@ -18103,8 +18175,8 @@ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_
|
|
18103
18175
|
(index < error_list->size) &&
|
18104
18176
|
(errors[index].error != NULL) &&
|
18105
18177
|
(
|
18106
|
-
(errors[index].line <
|
18107
|
-
(errors[index].line ==
|
18178
|
+
(errors[index].line < start.line) ||
|
18179
|
+
((errors[index].line == start.line) && (errors[index].column_start < start.column))
|
18108
18180
|
)
|
18109
18181
|
) index++;
|
18110
18182
|
|
@@ -18117,18 +18189,18 @@ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_
|
|
18117
18189
|
// Finally, we'll insert the error into the array.
|
18118
18190
|
uint32_t column_end;
|
18119
18191
|
if (start.line == end.line) {
|
18120
|
-
column_end =
|
18192
|
+
column_end = end.column;
|
18121
18193
|
} else {
|
18122
|
-
column_end = (uint32_t) (newline_list->offsets[start.line] - newline_list->offsets[start.line -
|
18194
|
+
column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
|
18123
18195
|
}
|
18124
18196
|
|
18125
18197
|
// Ensure we have at least one column of error.
|
18126
|
-
if (
|
18198
|
+
if (start.column == column_end) column_end++;
|
18127
18199
|
|
18128
18200
|
errors[index] = (pm_error_t) {
|
18129
18201
|
.error = error,
|
18130
|
-
.line =
|
18131
|
-
.column_start =
|
18202
|
+
.line = start.line,
|
18203
|
+
.column_start = start.column,
|
18132
18204
|
.column_end = column_end
|
18133
18205
|
};
|
18134
18206
|
}
|
@@ -18137,17 +18209,19 @@ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_
|
|
18137
18209
|
}
|
18138
18210
|
|
18139
18211
|
static inline void
|
18140
|
-
pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix,
|
18141
|
-
|
18212
|
+
pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, pm_buffer_t *buffer) {
|
18213
|
+
size_t index = (size_t) (line - parser->start_line);
|
18214
|
+
|
18215
|
+
const uint8_t *start = &parser->start[newline_list->offsets[index]];
|
18142
18216
|
const uint8_t *end;
|
18143
18217
|
|
18144
|
-
if (
|
18218
|
+
if (index >= newline_list->size - 1) {
|
18145
18219
|
end = parser->end;
|
18146
18220
|
} else {
|
18147
|
-
end = &parser->start[newline_list->offsets[
|
18221
|
+
end = &parser->start[newline_list->offsets[index + 1]];
|
18148
18222
|
}
|
18149
18223
|
|
18150
|
-
pm_buffer_append_format(buffer, number_prefix,
|
18224
|
+
pm_buffer_append_format(buffer, number_prefix, line);
|
18151
18225
|
pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
|
18152
18226
|
|
18153
18227
|
if (end == parser->end && end[-1] != '\n') {
|
@@ -18165,25 +18239,26 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18165
18239
|
|
18166
18240
|
// First, we're going to sort all of the errors by line number using an
|
18167
18241
|
// insertion sort into a newly allocated array.
|
18242
|
+
const int32_t start_line = parser->start_line;
|
18168
18243
|
const pm_newline_list_t *newline_list = &parser->newline_list;
|
18169
|
-
pm_error_t *errors = pm_parser_errors_format_sort(error_list, newline_list);
|
18244
|
+
pm_error_t *errors = pm_parser_errors_format_sort(parser, error_list, newline_list);
|
18170
18245
|
|
18171
18246
|
// Now we're going to determine how we're going to format line numbers and
|
18172
18247
|
// blank lines based on the maximum number of digits in the line numbers
|
18173
18248
|
// that are going to be displayed.
|
18174
18249
|
pm_error_format_t error_format;
|
18175
|
-
|
18250
|
+
int32_t max_line_number = errors[error_list->size - 1].line - start_line;
|
18176
18251
|
|
18177
18252
|
if (max_line_number < 10) {
|
18178
18253
|
if (colorize) {
|
18179
18254
|
error_format = (pm_error_format_t) {
|
18180
|
-
.number_prefix = PM_COLOR_GRAY "%1"
|
18255
|
+
.number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
|
18181
18256
|
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18182
18257
|
.divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
|
18183
18258
|
};
|
18184
18259
|
} else {
|
18185
18260
|
error_format = (pm_error_format_t) {
|
18186
|
-
.number_prefix = "%1"
|
18261
|
+
.number_prefix = "%1" PRIi32 " | ",
|
18187
18262
|
.blank_prefix = " | ",
|
18188
18263
|
.divider = " ~~~~~\n"
|
18189
18264
|
};
|
@@ -18191,13 +18266,13 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18191
18266
|
} else if (max_line_number < 100) {
|
18192
18267
|
if (colorize) {
|
18193
18268
|
error_format = (pm_error_format_t) {
|
18194
|
-
.number_prefix = PM_COLOR_GRAY "%2"
|
18269
|
+
.number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
|
18195
18270
|
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18196
18271
|
.divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
|
18197
18272
|
};
|
18198
18273
|
} else {
|
18199
18274
|
error_format = (pm_error_format_t) {
|
18200
|
-
.number_prefix = "%2"
|
18275
|
+
.number_prefix = "%2" PRIi32 " | ",
|
18201
18276
|
.blank_prefix = " | ",
|
18202
18277
|
.divider = " ~~~~~~\n"
|
18203
18278
|
};
|
@@ -18205,13 +18280,13 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18205
18280
|
} else if (max_line_number < 1000) {
|
18206
18281
|
if (colorize) {
|
18207
18282
|
error_format = (pm_error_format_t) {
|
18208
|
-
.number_prefix = PM_COLOR_GRAY "%3"
|
18283
|
+
.number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
|
18209
18284
|
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18210
18285
|
.divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
|
18211
18286
|
};
|
18212
18287
|
} else {
|
18213
18288
|
error_format = (pm_error_format_t) {
|
18214
|
-
.number_prefix = "%3"
|
18289
|
+
.number_prefix = "%3" PRIi32 " | ",
|
18215
18290
|
.blank_prefix = " | ",
|
18216
18291
|
.divider = " ~~~~~~~\n"
|
18217
18292
|
};
|
@@ -18219,13 +18294,13 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18219
18294
|
} else if (max_line_number < 10000) {
|
18220
18295
|
if (colorize) {
|
18221
18296
|
error_format = (pm_error_format_t) {
|
18222
|
-
.number_prefix = PM_COLOR_GRAY "%4"
|
18297
|
+
.number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
|
18223
18298
|
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18224
18299
|
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
18225
18300
|
};
|
18226
18301
|
} else {
|
18227
18302
|
error_format = (pm_error_format_t) {
|
18228
|
-
.number_prefix = "%4"
|
18303
|
+
.number_prefix = "%4" PRIi32 " | ",
|
18229
18304
|
.blank_prefix = " | ",
|
18230
18305
|
.divider = " ~~~~~~~~\n"
|
18231
18306
|
};
|
@@ -18233,13 +18308,13 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18233
18308
|
} else {
|
18234
18309
|
if (colorize) {
|
18235
18310
|
error_format = (pm_error_format_t) {
|
18236
|
-
.number_prefix = PM_COLOR_GRAY "%5"
|
18311
|
+
.number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
|
18237
18312
|
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18238
18313
|
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
18239
18314
|
};
|
18240
18315
|
} else {
|
18241
18316
|
error_format = (pm_error_format_t) {
|
18242
|
-
.number_prefix = "%5"
|
18317
|
+
.number_prefix = "%5" PRIi32 " | ",
|
18243
18318
|
.blank_prefix = " | ",
|
18244
18319
|
.divider = " ~~~~~~~~\n"
|
18245
18320
|
};
|
@@ -18254,7 +18329,7 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18254
18329
|
// the source before the error to give some context. We'll be careful not to
|
18255
18330
|
// display the same line twice in case the errors are close enough in the
|
18256
18331
|
// source.
|
18257
|
-
|
18332
|
+
int32_t last_line = 0;
|
18258
18333
|
const pm_encoding_t *encoding = parser->encoding;
|
18259
18334
|
|
18260
18335
|
for (size_t index = 0; index < error_list->size; index++) {
|
@@ -18300,7 +18375,7 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18300
18375
|
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
|
18301
18376
|
|
18302
18377
|
size_t column = 0;
|
18303
|
-
const uint8_t *start = &parser->start[newline_list->offsets[error->line -
|
18378
|
+
const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
|
18304
18379
|
|
18305
18380
|
while (column < error->column_end) {
|
18306
18381
|
if (column < error->column_start) {
|
@@ -18324,7 +18399,7 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
|
|
18324
18399
|
// Here we determine how many lines of padding to display after the
|
18325
18400
|
// error, depending on where the next error is in source.
|
18326
18401
|
last_line = error->line;
|
18327
|
-
|
18402
|
+
int32_t next_line = (index == error_list->size - 1) ? ((int32_t) newline_list->size) : errors[index + 1].line;
|
18328
18403
|
|
18329
18404
|
if (next_line - last_line > 1) {
|
18330
18405
|
pm_buffer_append_string(buffer, " ", 2);
|