prism 0.22.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -1
- data/README.md +2 -1
- data/docs/releasing.md +67 -17
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +2 -0
- data/ext/prism/api_node.c +784 -785
- data/ext/prism/extension.c +12 -7
- data/ext/prism/extension.h +2 -2
- data/include/prism/diagnostic.h +3 -4
- data/include/prism/encoding.h +7 -0
- data/include/prism/util/pm_constant_pool.h +1 -1
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/lib/prism/ffi.rb +1 -1
- data/lib/prism/lex_compat.rb +1 -0
- data/lib/prism/node_ext.rb +25 -2
- data/lib/prism/parse_result.rb +44 -15
- data/lib/prism/serialize.rb +12 -6
- data/lib/prism/translation/parser.rb +10 -9
- data/lib/prism/translation/ripper.rb +577 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +3 -3
- data/lib/prism.rb +0 -1
- data/prism.gemspec +4 -2
- data/src/diagnostic.c +10 -11
- data/src/encoding.c +16 -17
- data/src/options.c +7 -2
- data/src/prism.c +124 -64
- data/src/serialize.c +24 -13
- data/src/token_type.c +3 -3
- data/src/util/pm_constant_pool.c +1 -1
- data/src/util/pm_strpbrk.c +122 -14
- metadata +6 -4
- data/lib/prism/ripper_compat.rb +0 -285
data/src/prism.c
CHANGED
@@ -492,7 +492,8 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_
|
|
492
492
|
/**
|
493
493
|
* Append an error to the list of errors on the parser using a format string.
|
494
494
|
*/
|
495
|
-
#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...)
|
495
|
+
#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
|
496
|
+
pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
|
496
497
|
|
497
498
|
/**
|
498
499
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -507,7 +508,8 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
507
508
|
* Append an error to the list of errors on the parser using the given location
|
508
509
|
* using a format string.
|
509
510
|
*/
|
510
|
-
#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...)
|
511
|
+
#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
|
512
|
+
PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
|
511
513
|
|
512
514
|
/**
|
513
515
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -522,7 +524,15 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_
|
|
522
524
|
* Append an error to the list of errors on the parser using the location of the
|
523
525
|
* given node and a format string.
|
524
526
|
*/
|
525
|
-
#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...)
|
527
|
+
#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
|
528
|
+
PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
|
529
|
+
|
530
|
+
/**
|
531
|
+
* Append an error to the list of errors on the parser using the location of the
|
532
|
+
* given node and a format string, and add on the content of the node.
|
533
|
+
*/
|
534
|
+
#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
|
535
|
+
PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
|
526
536
|
|
527
537
|
/**
|
528
538
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -546,7 +556,15 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
|
|
546
556
|
* Append an error to the list of errors on the parser using the location of the
|
547
557
|
* given token and a format string.
|
548
558
|
*/
|
549
|
-
#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...)
|
559
|
+
#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
|
560
|
+
PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
|
561
|
+
|
562
|
+
/**
|
563
|
+
* Append an error to the list of errors on the parser using the location of the
|
564
|
+
* given token and a format string, and add on the content of the token.
|
565
|
+
*/
|
566
|
+
#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
|
567
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
|
550
568
|
|
551
569
|
/**
|
552
570
|
* Append a warning to the list of warnings on the parser.
|
@@ -2890,7 +2908,8 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
|
|
2890
2908
|
static pm_def_node_t *
|
2891
2909
|
pm_def_node_create(
|
2892
2910
|
pm_parser_t *parser,
|
2893
|
-
|
2911
|
+
pm_constant_id_t name,
|
2912
|
+
const pm_token_t *name_loc,
|
2894
2913
|
pm_node_t *receiver,
|
2895
2914
|
pm_parameters_node_t *parameters,
|
2896
2915
|
pm_node_t *body,
|
@@ -2920,8 +2939,8 @@ pm_def_node_create(
|
|
2920
2939
|
.type = PM_DEF_NODE,
|
2921
2940
|
.location = { .start = def_keyword->start, .end = end },
|
2922
2941
|
},
|
2923
|
-
.name =
|
2924
|
-
.name_loc = PM_LOCATION_TOKEN_VALUE(
|
2942
|
+
.name = name,
|
2943
|
+
.name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
|
2925
2944
|
.receiver = receiver,
|
2926
2945
|
.parameters = parameters,
|
2927
2946
|
.body = body,
|
@@ -4642,13 +4661,20 @@ pm_multi_target_node_create(pm_parser_t *parser) {
|
|
4642
4661
|
*/
|
4643
4662
|
static void
|
4644
4663
|
pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
|
4645
|
-
if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)
|
4664
|
+
if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
|
4646
4665
|
if (node->rest == NULL) {
|
4647
4666
|
node->rest = target;
|
4648
4667
|
} else {
|
4649
4668
|
pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
|
4650
4669
|
pm_node_list_append(&node->rights, target);
|
4651
4670
|
}
|
4671
|
+
} else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
|
4672
|
+
if (node->rest == NULL) {
|
4673
|
+
node->rest = target;
|
4674
|
+
} else {
|
4675
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
|
4676
|
+
pm_node_list_append(&node->rights, target);
|
4677
|
+
}
|
4652
4678
|
} else if (node->rest == NULL) {
|
4653
4679
|
pm_node_list_append(&node->lefts, target);
|
4654
4680
|
} else {
|
@@ -7172,7 +7198,7 @@ lex_numeric(pm_parser_t *parser) {
|
|
7172
7198
|
static pm_token_type_t
|
7173
7199
|
lex_global_variable(pm_parser_t *parser) {
|
7174
7200
|
if (parser->current.end >= parser->end) {
|
7175
|
-
|
7201
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
7176
7202
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
7177
7203
|
}
|
7178
7204
|
|
@@ -7213,7 +7239,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
7213
7239
|
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
7214
7240
|
|
7215
7241
|
// $0 isn't allowed to be followed by anything.
|
7216
|
-
|
7242
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
7217
7243
|
}
|
7218
7244
|
|
7219
7245
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -7244,7 +7270,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
7244
7270
|
} else {
|
7245
7271
|
// If we get here, then we have a $ followed by something that isn't
|
7246
7272
|
// recognized as a global variable.
|
7247
|
-
|
7273
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
7248
7274
|
}
|
7249
7275
|
|
7250
7276
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -8148,10 +8174,10 @@ lex_at_variable(pm_parser_t *parser) {
|
|
8148
8174
|
while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
|
8149
8175
|
parser->current.end += width;
|
8150
8176
|
}
|
8151
|
-
} else if (type == PM_TOKEN_CLASS_VARIABLE) {
|
8152
|
-
pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
|
8153
8177
|
} else {
|
8154
|
-
|
8178
|
+
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
|
8179
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8180
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
|
8155
8181
|
}
|
8156
8182
|
|
8157
8183
|
// If we're lexing an embedded variable, then we need to pop back into the
|
@@ -9711,7 +9737,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9711
9737
|
// and then find the first one.
|
9712
9738
|
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
9713
9739
|
const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
|
9714
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9740
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9715
9741
|
|
9716
9742
|
// If we haven't found an escape yet, then this buffer will be
|
9717
9743
|
// unallocated since we can refer directly to the source string.
|
@@ -9720,7 +9746,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9720
9746
|
while (breakpoint != NULL) {
|
9721
9747
|
// If we hit a null byte, skip directly past it.
|
9722
9748
|
if (*breakpoint == '\0') {
|
9723
|
-
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
9749
|
+
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
|
9724
9750
|
continue;
|
9725
9751
|
}
|
9726
9752
|
|
@@ -9739,7 +9765,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9739
9765
|
// we need to continue on past it.
|
9740
9766
|
if (lex_mode->as.list.nesting > 0) {
|
9741
9767
|
parser->current.end = breakpoint + 1;
|
9742
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9768
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9743
9769
|
lex_mode->as.list.nesting--;
|
9744
9770
|
continue;
|
9745
9771
|
}
|
@@ -9824,7 +9850,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9824
9850
|
}
|
9825
9851
|
|
9826
9852
|
token_buffer.cursor = parser->current.end;
|
9827
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9853
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9828
9854
|
continue;
|
9829
9855
|
}
|
9830
9856
|
|
@@ -9837,7 +9863,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9837
9863
|
// that looked like an interpolated class or instance variable
|
9838
9864
|
// like "#@" but wasn't actually. In this case we'll just skip
|
9839
9865
|
// to the next breakpoint.
|
9840
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9866
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9841
9867
|
continue;
|
9842
9868
|
}
|
9843
9869
|
|
@@ -9852,7 +9878,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9852
9878
|
// and find the next breakpoint.
|
9853
9879
|
assert(*breakpoint == lex_mode->as.list.incrementor);
|
9854
9880
|
parser->current.end = breakpoint + 1;
|
9855
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9881
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9856
9882
|
lex_mode->as.list.nesting++;
|
9857
9883
|
continue;
|
9858
9884
|
}
|
@@ -9891,14 +9917,14 @@ parser_lex(pm_parser_t *parser) {
|
|
9891
9917
|
// regular expression. We'll use strpbrk to find the first of these
|
9892
9918
|
// characters.
|
9893
9919
|
const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
|
9894
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9920
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9895
9921
|
pm_token_buffer_t token_buffer = { { 0 }, 0 };
|
9896
9922
|
|
9897
9923
|
while (breakpoint != NULL) {
|
9898
9924
|
// If we hit a null byte, skip directly past it.
|
9899
9925
|
if (*breakpoint == '\0') {
|
9900
9926
|
parser->current.end = breakpoint + 1;
|
9901
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9927
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9902
9928
|
continue;
|
9903
9929
|
}
|
9904
9930
|
|
@@ -9920,7 +9946,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9920
9946
|
// If the terminator is not a newline, then we can set
|
9921
9947
|
// the next breakpoint and continue.
|
9922
9948
|
parser->current.end = breakpoint + 1;
|
9923
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9949
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9924
9950
|
continue;
|
9925
9951
|
}
|
9926
9952
|
}
|
@@ -9930,7 +9956,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9930
9956
|
if (*breakpoint == lex_mode->as.regexp.terminator) {
|
9931
9957
|
if (lex_mode->as.regexp.nesting > 0) {
|
9932
9958
|
parser->current.end = breakpoint + 1;
|
9933
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9959
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9934
9960
|
lex_mode->as.regexp.nesting--;
|
9935
9961
|
continue;
|
9936
9962
|
}
|
@@ -10029,7 +10055,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10029
10055
|
}
|
10030
10056
|
|
10031
10057
|
token_buffer.cursor = parser->current.end;
|
10032
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10058
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
10033
10059
|
continue;
|
10034
10060
|
}
|
10035
10061
|
|
@@ -10042,7 +10068,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10042
10068
|
// something that looked like an interpolated class or
|
10043
10069
|
// instance variable like "#@" but wasn't actually. In
|
10044
10070
|
// this case we'll just skip to the next breakpoint.
|
10045
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10071
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
10046
10072
|
continue;
|
10047
10073
|
}
|
10048
10074
|
|
@@ -10057,7 +10083,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10057
10083
|
// and find the next breakpoint.
|
10058
10084
|
assert(*breakpoint == lex_mode->as.regexp.incrementor);
|
10059
10085
|
parser->current.end = breakpoint + 1;
|
10060
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10086
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
10061
10087
|
lex_mode->as.regexp.nesting++;
|
10062
10088
|
continue;
|
10063
10089
|
}
|
@@ -10093,7 +10119,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10093
10119
|
// string. We'll use strpbrk to find the first of these characters.
|
10094
10120
|
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
10095
10121
|
const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
|
10096
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10122
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10097
10123
|
|
10098
10124
|
// If we haven't found an escape yet, then this buffer will be
|
10099
10125
|
// unallocated since we can refer directly to the source string.
|
@@ -10105,7 +10131,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10105
10131
|
if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
|
10106
10132
|
lex_mode->as.string.nesting++;
|
10107
10133
|
parser->current.end = breakpoint + 1;
|
10108
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10134
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10109
10135
|
continue;
|
10110
10136
|
}
|
10111
10137
|
|
@@ -10117,7 +10143,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10117
10143
|
// to continue on past it.
|
10118
10144
|
if (lex_mode->as.string.nesting > 0) {
|
10119
10145
|
parser->current.end = breakpoint + 1;
|
10120
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10146
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10121
10147
|
lex_mode->as.string.nesting--;
|
10122
10148
|
continue;
|
10123
10149
|
}
|
@@ -10159,7 +10185,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10159
10185
|
if (parser->heredoc_end == NULL) {
|
10160
10186
|
pm_newline_list_append(&parser->newline_list, breakpoint);
|
10161
10187
|
parser->current.end = breakpoint + 1;
|
10162
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10188
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10163
10189
|
continue;
|
10164
10190
|
} else {
|
10165
10191
|
parser->current.end = breakpoint + 1;
|
@@ -10173,7 +10199,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10173
10199
|
case '\0':
|
10174
10200
|
// Skip directly past the null character.
|
10175
10201
|
parser->current.end = breakpoint + 1;
|
10176
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10202
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10177
10203
|
break;
|
10178
10204
|
case '\\': {
|
10179
10205
|
// Here we hit escapes.
|
@@ -10242,7 +10268,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10242
10268
|
}
|
10243
10269
|
|
10244
10270
|
token_buffer.cursor = parser->current.end;
|
10245
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10271
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10246
10272
|
break;
|
10247
10273
|
}
|
10248
10274
|
case '#': {
|
@@ -10253,7 +10279,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10253
10279
|
// looked like an interpolated class or instance variable like "#@"
|
10254
10280
|
// but wasn't actually. In this case we'll just skip to the next
|
10255
10281
|
// breakpoint.
|
10256
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10282
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10257
10283
|
break;
|
10258
10284
|
}
|
10259
10285
|
|
@@ -10381,7 +10407,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10381
10407
|
breakpoints[2] = '\0';
|
10382
10408
|
}
|
10383
10409
|
|
10384
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10410
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10385
10411
|
pm_token_buffer_t token_buffer = { { 0 }, 0 };
|
10386
10412
|
bool was_escaped_newline = false;
|
10387
10413
|
|
@@ -10390,7 +10416,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10390
10416
|
case '\0':
|
10391
10417
|
// Skip directly past the null character.
|
10392
10418
|
parser->current.end = breakpoint + 1;
|
10393
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10419
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10394
10420
|
break;
|
10395
10421
|
case '\n': {
|
10396
10422
|
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
|
@@ -10465,7 +10491,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10465
10491
|
// Otherwise we hit a newline and it wasn't followed by
|
10466
10492
|
// a terminator, so we can continue parsing.
|
10467
10493
|
parser->current.end = breakpoint + 1;
|
10468
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10494
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10469
10495
|
break;
|
10470
10496
|
}
|
10471
10497
|
case '\\': {
|
@@ -10529,7 +10555,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10529
10555
|
}
|
10530
10556
|
|
10531
10557
|
token_buffer.cursor = parser->current.end;
|
10532
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10558
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10533
10559
|
break;
|
10534
10560
|
}
|
10535
10561
|
case '#': {
|
@@ -10541,7 +10567,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10541
10567
|
// or instance variable like "#@" but wasn't
|
10542
10568
|
// actually. In this case we'll just skip to the
|
10543
10569
|
// next breakpoint.
|
10544
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10570
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10545
10571
|
break;
|
10546
10572
|
}
|
10547
10573
|
|
@@ -11054,7 +11080,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
11054
11080
|
return target;
|
11055
11081
|
case PM_BACK_REFERENCE_READ_NODE:
|
11056
11082
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
11057
|
-
|
11083
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
11058
11084
|
return target;
|
11059
11085
|
case PM_GLOBAL_VARIABLE_READ_NODE:
|
11060
11086
|
assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
|
@@ -11192,7 +11218,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
11192
11218
|
}
|
11193
11219
|
case PM_BACK_REFERENCE_READ_NODE:
|
11194
11220
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
11195
|
-
|
11221
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
11196
11222
|
/* fallthrough */
|
11197
11223
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
11198
11224
|
pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
|
@@ -11367,7 +11393,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
11367
11393
|
pm_multi_target_node_targets_append(parser, result, target);
|
11368
11394
|
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
11369
11395
|
// If we get here, then we have a trailing , in a multi target node.
|
11370
|
-
// We'll
|
11396
|
+
// We'll add an implicit rest node to represent this.
|
11371
11397
|
pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
11372
11398
|
pm_multi_target_node_targets_append(parser, result, rest);
|
11373
11399
|
break;
|
@@ -11457,8 +11483,13 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
11457
11483
|
|
11458
11484
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
11459
11485
|
if (context_terminator(context, &parser->current)) break;
|
11460
|
-
} else {
|
11461
|
-
|
11486
|
+
} else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
|
11487
|
+
// This is an inlined version of accept1 because the error that we
|
11488
|
+
// want to add has varargs. If this happens again, we should
|
11489
|
+
// probably extract a helper function.
|
11490
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
11491
|
+
parser->previous.start = parser->previous.end;
|
11492
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
11462
11493
|
}
|
11463
11494
|
}
|
11464
11495
|
|
@@ -13852,7 +13883,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
13852
13883
|
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
13853
13884
|
variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
13854
13885
|
} else {
|
13855
|
-
|
13886
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
|
13856
13887
|
variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
13857
13888
|
}
|
13858
13889
|
}
|
@@ -14161,7 +14192,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14161
14192
|
parser_lex(parser);
|
14162
14193
|
|
14163
14194
|
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
14164
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14195
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
14165
14196
|
// If we get here, then we have an end immediately after a
|
14166
14197
|
// start. In that case we'll create an empty content token and
|
14167
14198
|
// return an uninterpolated string.
|
@@ -14218,15 +14249,19 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14218
14249
|
parser_lex(parser);
|
14219
14250
|
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
14220
14251
|
|
14221
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14252
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
14222
14253
|
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
14223
14254
|
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
14224
14255
|
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
14225
14256
|
} else if (match1(parser, PM_TOKEN_EOF)) {
|
14226
|
-
pm_parser_err_token(parser, &opening,
|
14257
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
14227
14258
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
14259
|
+
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
|
14260
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14228
14261
|
} else {
|
14229
|
-
|
14262
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
|
14263
|
+
parser->previous.start = parser->previous.end;
|
14264
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
14230
14265
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14231
14266
|
}
|
14232
14267
|
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
@@ -14241,7 +14276,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14241
14276
|
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
14242
14277
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
14243
14278
|
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
14244
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14279
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
14245
14280
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
14246
14281
|
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
14247
14282
|
} else {
|
@@ -14332,6 +14367,29 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14332
14367
|
return current;
|
14333
14368
|
}
|
14334
14369
|
|
14370
|
+
/**
|
14371
|
+
* Append an error to the error list on the parser using the given diagnostic
|
14372
|
+
* ID. This function is a specialization that handles formatting the specific
|
14373
|
+
* kind of error that is being appended.
|
14374
|
+
*/
|
14375
|
+
static void
|
14376
|
+
pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
14377
|
+
switch (diag_id) {
|
14378
|
+
case PM_ERR_HASH_KEY: {
|
14379
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
|
14380
|
+
break;
|
14381
|
+
}
|
14382
|
+
case PM_ERR_UNARY_RECEIVER: {
|
14383
|
+
const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
|
14384
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
|
14385
|
+
break;
|
14386
|
+
}
|
14387
|
+
default:
|
14388
|
+
pm_parser_err_previous(parser, diag_id);
|
14389
|
+
break;
|
14390
|
+
}
|
14391
|
+
}
|
14392
|
+
|
14335
14393
|
/**
|
14336
14394
|
* Parse an expression that begins with the previous node that we just lexed.
|
14337
14395
|
*/
|
@@ -14516,7 +14574,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14516
14574
|
// If we didn't find a terminator and we didn't find a right
|
14517
14575
|
// parenthesis, then this is a syntax error.
|
14518
14576
|
if (!terminator_found) {
|
14519
|
-
|
14577
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
14520
14578
|
}
|
14521
14579
|
|
14522
14580
|
// Parse each statement within the parentheses.
|
@@ -14545,7 +14603,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14545
14603
|
} else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
14546
14604
|
break;
|
14547
14605
|
} else {
|
14548
|
-
|
14606
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
14549
14607
|
}
|
14550
14608
|
}
|
14551
14609
|
|
@@ -15626,10 +15684,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15626
15684
|
* methods to override the unary operators, we should ignore
|
15627
15685
|
* the @ in the same way we do for symbols.
|
15628
15686
|
*/
|
15629
|
-
name.
|
15687
|
+
pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
|
15630
15688
|
|
15631
15689
|
return (pm_node_t *) pm_def_node_create(
|
15632
15690
|
parser,
|
15691
|
+
name_id,
|
15633
15692
|
&name,
|
15634
15693
|
receiver,
|
15635
15694
|
params,
|
@@ -16458,7 +16517,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16458
16517
|
// context of a multiple assignment. We enforce that here. We'll
|
16459
16518
|
// still lex past it though and create a missing node place.
|
16460
16519
|
if (binding_power != PM_BINDING_POWER_STATEMENT) {
|
16461
|
-
|
16520
|
+
pm_parser_err_prefix(parser, diag_id);
|
16462
16521
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
16463
16522
|
}
|
16464
16523
|
|
@@ -16481,7 +16540,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16481
16540
|
parser_lex(parser);
|
16482
16541
|
|
16483
16542
|
pm_token_t operator = parser->previous;
|
16484
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH,
|
16543
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER);
|
16485
16544
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
|
16486
16545
|
|
16487
16546
|
pm_conditional_predicate(receiver);
|
@@ -16491,7 +16550,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16491
16550
|
parser_lex(parser);
|
16492
16551
|
|
16493
16552
|
pm_token_t operator = parser->previous;
|
16494
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16553
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16495
16554
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
|
16496
16555
|
|
16497
16556
|
return (pm_node_t *) node;
|
@@ -16500,7 +16559,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16500
16559
|
parser_lex(parser);
|
16501
16560
|
|
16502
16561
|
pm_token_t operator = parser->previous;
|
16503
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16562
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16504
16563
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
|
16505
16564
|
|
16506
16565
|
return (pm_node_t *) node;
|
@@ -16509,7 +16568,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16509
16568
|
parser_lex(parser);
|
16510
16569
|
|
16511
16570
|
pm_token_t operator = parser->previous;
|
16512
|
-
pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16571
|
+
pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16513
16572
|
|
16514
16573
|
if (accept1(parser, PM_TOKEN_STAR_STAR)) {
|
16515
16574
|
pm_token_t exponent_operator = parser->previous;
|
@@ -16625,7 +16684,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16625
16684
|
parser_lex(parser);
|
16626
16685
|
|
16627
16686
|
pm_token_t operator = parser->previous;
|
16628
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16687
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16629
16688
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
|
16630
16689
|
|
16631
16690
|
return (pm_node_t *) node;
|
@@ -16648,7 +16707,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16648
16707
|
// here because it will provide more context in addition to the
|
16649
16708
|
// recoverable error that we will also add.
|
16650
16709
|
if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
|
16651
|
-
|
16710
|
+
pm_parser_err_prefix(parser, diag_id);
|
16652
16711
|
}
|
16653
16712
|
|
16654
16713
|
// If we get here, then we are assuming this token is closing a
|
@@ -16661,7 +16720,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16661
16720
|
// have an unexpected token.
|
16662
16721
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
|
16663
16722
|
} else {
|
16664
|
-
|
16723
|
+
pm_parser_err_prefix(parser, diag_id);
|
16665
16724
|
}
|
16666
16725
|
|
16667
16726
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
@@ -16895,7 +16954,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16895
16954
|
switch (PM_NODE_TYPE(node)) {
|
16896
16955
|
case PM_BACK_REFERENCE_READ_NODE:
|
16897
16956
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
16898
|
-
|
16957
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
16899
16958
|
/* fallthrough */
|
16900
16959
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
16901
16960
|
parser_lex(parser);
|
@@ -17006,7 +17065,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17006
17065
|
switch (PM_NODE_TYPE(node)) {
|
17007
17066
|
case PM_BACK_REFERENCE_READ_NODE:
|
17008
17067
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
17009
|
-
|
17068
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
17010
17069
|
/* fallthrough */
|
17011
17070
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
17012
17071
|
parser_lex(parser);
|
@@ -17127,7 +17186,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17127
17186
|
switch (PM_NODE_TYPE(node)) {
|
17128
17187
|
case PM_BACK_REFERENCE_READ_NODE:
|
17129
17188
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
17130
|
-
|
17189
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
17131
17190
|
/* fallthrough */
|
17132
17191
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
17133
17192
|
parser_lex(parser);
|
@@ -17791,6 +17850,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17791
17850
|
.current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
|
17792
17851
|
.next_start = NULL,
|
17793
17852
|
.heredoc_end = NULL,
|
17853
|
+
.data_loc = { .start = NULL, .end = NULL },
|
17794
17854
|
.comment_list = { 0 },
|
17795
17855
|
.magic_comment_list = { 0 },
|
17796
17856
|
.warning_list = { 0 },
|