prism 0.22.0 → 0.23.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -1
- data/README.md +2 -1
- data/docs/releasing.md +67 -17
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +2 -0
- data/ext/prism/api_node.c +784 -785
- data/ext/prism/extension.c +12 -7
- data/ext/prism/extension.h +2 -2
- data/include/prism/diagnostic.h +3 -4
- data/include/prism/encoding.h +7 -0
- data/include/prism/util/pm_constant_pool.h +1 -1
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/lib/prism/ffi.rb +1 -1
- data/lib/prism/lex_compat.rb +1 -0
- data/lib/prism/node_ext.rb +25 -2
- data/lib/prism/parse_result.rb +44 -15
- data/lib/prism/serialize.rb +12 -6
- data/lib/prism/translation/parser.rb +10 -9
- data/lib/prism/translation/ripper.rb +577 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +3 -3
- data/lib/prism.rb +0 -1
- data/prism.gemspec +4 -2
- data/src/diagnostic.c +10 -11
- data/src/encoding.c +16 -17
- data/src/options.c +7 -2
- data/src/prism.c +124 -64
- data/src/serialize.c +24 -13
- data/src/token_type.c +3 -3
- data/src/util/pm_constant_pool.c +1 -1
- data/src/util/pm_strpbrk.c +122 -14
- metadata +6 -4
- data/lib/prism/ripper_compat.rb +0 -285
data/src/prism.c
CHANGED
@@ -492,7 +492,8 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_
|
|
492
492
|
/**
|
493
493
|
* Append an error to the list of errors on the parser using a format string.
|
494
494
|
*/
|
495
|
-
#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...)
|
495
|
+
#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
|
496
|
+
pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
|
496
497
|
|
497
498
|
/**
|
498
499
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -507,7 +508,8 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
507
508
|
* Append an error to the list of errors on the parser using the given location
|
508
509
|
* using a format string.
|
509
510
|
*/
|
510
|
-
#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...)
|
511
|
+
#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
|
512
|
+
PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
|
511
513
|
|
512
514
|
/**
|
513
515
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -522,7 +524,15 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_
|
|
522
524
|
* Append an error to the list of errors on the parser using the location of the
|
523
525
|
* given node and a format string.
|
524
526
|
*/
|
525
|
-
#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...)
|
527
|
+
#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
|
528
|
+
PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
|
529
|
+
|
530
|
+
/**
|
531
|
+
* Append an error to the list of errors on the parser using the location of the
|
532
|
+
* given node and a format string, and add on the content of the node.
|
533
|
+
*/
|
534
|
+
#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
|
535
|
+
PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
|
526
536
|
|
527
537
|
/**
|
528
538
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -546,7 +556,15 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
|
|
546
556
|
* Append an error to the list of errors on the parser using the location of the
|
547
557
|
* given token and a format string.
|
548
558
|
*/
|
549
|
-
#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...)
|
559
|
+
#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
|
560
|
+
PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
|
561
|
+
|
562
|
+
/**
|
563
|
+
* Append an error to the list of errors on the parser using the location of the
|
564
|
+
* given token and a format string, and add on the content of the token.
|
565
|
+
*/
|
566
|
+
#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
|
567
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
|
550
568
|
|
551
569
|
/**
|
552
570
|
* Append a warning to the list of warnings on the parser.
|
@@ -2890,7 +2908,8 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
|
|
2890
2908
|
static pm_def_node_t *
|
2891
2909
|
pm_def_node_create(
|
2892
2910
|
pm_parser_t *parser,
|
2893
|
-
|
2911
|
+
pm_constant_id_t name,
|
2912
|
+
const pm_token_t *name_loc,
|
2894
2913
|
pm_node_t *receiver,
|
2895
2914
|
pm_parameters_node_t *parameters,
|
2896
2915
|
pm_node_t *body,
|
@@ -2920,8 +2939,8 @@ pm_def_node_create(
|
|
2920
2939
|
.type = PM_DEF_NODE,
|
2921
2940
|
.location = { .start = def_keyword->start, .end = end },
|
2922
2941
|
},
|
2923
|
-
.name =
|
2924
|
-
.name_loc = PM_LOCATION_TOKEN_VALUE(
|
2942
|
+
.name = name,
|
2943
|
+
.name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
|
2925
2944
|
.receiver = receiver,
|
2926
2945
|
.parameters = parameters,
|
2927
2946
|
.body = body,
|
@@ -4642,13 +4661,20 @@ pm_multi_target_node_create(pm_parser_t *parser) {
|
|
4642
4661
|
*/
|
4643
4662
|
static void
|
4644
4663
|
pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
|
4645
|
-
if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)
|
4664
|
+
if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
|
4646
4665
|
if (node->rest == NULL) {
|
4647
4666
|
node->rest = target;
|
4648
4667
|
} else {
|
4649
4668
|
pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
|
4650
4669
|
pm_node_list_append(&node->rights, target);
|
4651
4670
|
}
|
4671
|
+
} else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
|
4672
|
+
if (node->rest == NULL) {
|
4673
|
+
node->rest = target;
|
4674
|
+
} else {
|
4675
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
|
4676
|
+
pm_node_list_append(&node->rights, target);
|
4677
|
+
}
|
4652
4678
|
} else if (node->rest == NULL) {
|
4653
4679
|
pm_node_list_append(&node->lefts, target);
|
4654
4680
|
} else {
|
@@ -7172,7 +7198,7 @@ lex_numeric(pm_parser_t *parser) {
|
|
7172
7198
|
static pm_token_type_t
|
7173
7199
|
lex_global_variable(pm_parser_t *parser) {
|
7174
7200
|
if (parser->current.end >= parser->end) {
|
7175
|
-
|
7201
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
7176
7202
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
7177
7203
|
}
|
7178
7204
|
|
@@ -7213,7 +7239,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
7213
7239
|
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
7214
7240
|
|
7215
7241
|
// $0 isn't allowed to be followed by anything.
|
7216
|
-
|
7242
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
7217
7243
|
}
|
7218
7244
|
|
7219
7245
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -7244,7 +7270,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
7244
7270
|
} else {
|
7245
7271
|
// If we get here, then we have a $ followed by something that isn't
|
7246
7272
|
// recognized as a global variable.
|
7247
|
-
|
7273
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
7248
7274
|
}
|
7249
7275
|
|
7250
7276
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -8148,10 +8174,10 @@ lex_at_variable(pm_parser_t *parser) {
|
|
8148
8174
|
while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
|
8149
8175
|
parser->current.end += width;
|
8150
8176
|
}
|
8151
|
-
} else if (type == PM_TOKEN_CLASS_VARIABLE) {
|
8152
|
-
pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
|
8153
8177
|
} else {
|
8154
|
-
|
8178
|
+
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
|
8179
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8180
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
|
8155
8181
|
}
|
8156
8182
|
|
8157
8183
|
// If we're lexing an embedded variable, then we need to pop back into the
|
@@ -9711,7 +9737,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9711
9737
|
// and then find the first one.
|
9712
9738
|
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
9713
9739
|
const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
|
9714
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9740
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9715
9741
|
|
9716
9742
|
// If we haven't found an escape yet, then this buffer will be
|
9717
9743
|
// unallocated since we can refer directly to the source string.
|
@@ -9720,7 +9746,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9720
9746
|
while (breakpoint != NULL) {
|
9721
9747
|
// If we hit a null byte, skip directly past it.
|
9722
9748
|
if (*breakpoint == '\0') {
|
9723
|
-
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
9749
|
+
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
|
9724
9750
|
continue;
|
9725
9751
|
}
|
9726
9752
|
|
@@ -9739,7 +9765,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9739
9765
|
// we need to continue on past it.
|
9740
9766
|
if (lex_mode->as.list.nesting > 0) {
|
9741
9767
|
parser->current.end = breakpoint + 1;
|
9742
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9768
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9743
9769
|
lex_mode->as.list.nesting--;
|
9744
9770
|
continue;
|
9745
9771
|
}
|
@@ -9824,7 +9850,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9824
9850
|
}
|
9825
9851
|
|
9826
9852
|
token_buffer.cursor = parser->current.end;
|
9827
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9853
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9828
9854
|
continue;
|
9829
9855
|
}
|
9830
9856
|
|
@@ -9837,7 +9863,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9837
9863
|
// that looked like an interpolated class or instance variable
|
9838
9864
|
// like "#@" but wasn't actually. In this case we'll just skip
|
9839
9865
|
// to the next breakpoint.
|
9840
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9866
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9841
9867
|
continue;
|
9842
9868
|
}
|
9843
9869
|
|
@@ -9852,7 +9878,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9852
9878
|
// and find the next breakpoint.
|
9853
9879
|
assert(*breakpoint == lex_mode->as.list.incrementor);
|
9854
9880
|
parser->current.end = breakpoint + 1;
|
9855
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9881
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9856
9882
|
lex_mode->as.list.nesting++;
|
9857
9883
|
continue;
|
9858
9884
|
}
|
@@ -9891,14 +9917,14 @@ parser_lex(pm_parser_t *parser) {
|
|
9891
9917
|
// regular expression. We'll use strpbrk to find the first of these
|
9892
9918
|
// characters.
|
9893
9919
|
const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
|
9894
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9920
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9895
9921
|
pm_token_buffer_t token_buffer = { { 0 }, 0 };
|
9896
9922
|
|
9897
9923
|
while (breakpoint != NULL) {
|
9898
9924
|
// If we hit a null byte, skip directly past it.
|
9899
9925
|
if (*breakpoint == '\0') {
|
9900
9926
|
parser->current.end = breakpoint + 1;
|
9901
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9927
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9902
9928
|
continue;
|
9903
9929
|
}
|
9904
9930
|
|
@@ -9920,7 +9946,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9920
9946
|
// If the terminator is not a newline, then we can set
|
9921
9947
|
// the next breakpoint and continue.
|
9922
9948
|
parser->current.end = breakpoint + 1;
|
9923
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9949
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9924
9950
|
continue;
|
9925
9951
|
}
|
9926
9952
|
}
|
@@ -9930,7 +9956,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9930
9956
|
if (*breakpoint == lex_mode->as.regexp.terminator) {
|
9931
9957
|
if (lex_mode->as.regexp.nesting > 0) {
|
9932
9958
|
parser->current.end = breakpoint + 1;
|
9933
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9959
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9934
9960
|
lex_mode->as.regexp.nesting--;
|
9935
9961
|
continue;
|
9936
9962
|
}
|
@@ -10029,7 +10055,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10029
10055
|
}
|
10030
10056
|
|
10031
10057
|
token_buffer.cursor = parser->current.end;
|
10032
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10058
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
10033
10059
|
continue;
|
10034
10060
|
}
|
10035
10061
|
|
@@ -10042,7 +10068,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10042
10068
|
// something that looked like an interpolated class or
|
10043
10069
|
// instance variable like "#@" but wasn't actually. In
|
10044
10070
|
// this case we'll just skip to the next breakpoint.
|
10045
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10071
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
10046
10072
|
continue;
|
10047
10073
|
}
|
10048
10074
|
|
@@ -10057,7 +10083,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10057
10083
|
// and find the next breakpoint.
|
10058
10084
|
assert(*breakpoint == lex_mode->as.regexp.incrementor);
|
10059
10085
|
parser->current.end = breakpoint + 1;
|
10060
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10086
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
10061
10087
|
lex_mode->as.regexp.nesting++;
|
10062
10088
|
continue;
|
10063
10089
|
}
|
@@ -10093,7 +10119,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10093
10119
|
// string. We'll use strpbrk to find the first of these characters.
|
10094
10120
|
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
10095
10121
|
const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
|
10096
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10122
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10097
10123
|
|
10098
10124
|
// If we haven't found an escape yet, then this buffer will be
|
10099
10125
|
// unallocated since we can refer directly to the source string.
|
@@ -10105,7 +10131,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10105
10131
|
if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
|
10106
10132
|
lex_mode->as.string.nesting++;
|
10107
10133
|
parser->current.end = breakpoint + 1;
|
10108
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10134
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10109
10135
|
continue;
|
10110
10136
|
}
|
10111
10137
|
|
@@ -10117,7 +10143,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10117
10143
|
// to continue on past it.
|
10118
10144
|
if (lex_mode->as.string.nesting > 0) {
|
10119
10145
|
parser->current.end = breakpoint + 1;
|
10120
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10146
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10121
10147
|
lex_mode->as.string.nesting--;
|
10122
10148
|
continue;
|
10123
10149
|
}
|
@@ -10159,7 +10185,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10159
10185
|
if (parser->heredoc_end == NULL) {
|
10160
10186
|
pm_newline_list_append(&parser->newline_list, breakpoint);
|
10161
10187
|
parser->current.end = breakpoint + 1;
|
10162
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10188
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10163
10189
|
continue;
|
10164
10190
|
} else {
|
10165
10191
|
parser->current.end = breakpoint + 1;
|
@@ -10173,7 +10199,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10173
10199
|
case '\0':
|
10174
10200
|
// Skip directly past the null character.
|
10175
10201
|
parser->current.end = breakpoint + 1;
|
10176
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10202
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10177
10203
|
break;
|
10178
10204
|
case '\\': {
|
10179
10205
|
// Here we hit escapes.
|
@@ -10242,7 +10268,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10242
10268
|
}
|
10243
10269
|
|
10244
10270
|
token_buffer.cursor = parser->current.end;
|
10245
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10271
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10246
10272
|
break;
|
10247
10273
|
}
|
10248
10274
|
case '#': {
|
@@ -10253,7 +10279,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10253
10279
|
// looked like an interpolated class or instance variable like "#@"
|
10254
10280
|
// but wasn't actually. In this case we'll just skip to the next
|
10255
10281
|
// breakpoint.
|
10256
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10282
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10257
10283
|
break;
|
10258
10284
|
}
|
10259
10285
|
|
@@ -10381,7 +10407,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10381
10407
|
breakpoints[2] = '\0';
|
10382
10408
|
}
|
10383
10409
|
|
10384
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10410
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10385
10411
|
pm_token_buffer_t token_buffer = { { 0 }, 0 };
|
10386
10412
|
bool was_escaped_newline = false;
|
10387
10413
|
|
@@ -10390,7 +10416,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10390
10416
|
case '\0':
|
10391
10417
|
// Skip directly past the null character.
|
10392
10418
|
parser->current.end = breakpoint + 1;
|
10393
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10419
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10394
10420
|
break;
|
10395
10421
|
case '\n': {
|
10396
10422
|
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
|
@@ -10465,7 +10491,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10465
10491
|
// Otherwise we hit a newline and it wasn't followed by
|
10466
10492
|
// a terminator, so we can continue parsing.
|
10467
10493
|
parser->current.end = breakpoint + 1;
|
10468
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10494
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10469
10495
|
break;
|
10470
10496
|
}
|
10471
10497
|
case '\\': {
|
@@ -10529,7 +10555,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10529
10555
|
}
|
10530
10556
|
|
10531
10557
|
token_buffer.cursor = parser->current.end;
|
10532
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10558
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10533
10559
|
break;
|
10534
10560
|
}
|
10535
10561
|
case '#': {
|
@@ -10541,7 +10567,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10541
10567
|
// or instance variable like "#@" but wasn't
|
10542
10568
|
// actually. In this case we'll just skip to the
|
10543
10569
|
// next breakpoint.
|
10544
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10570
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10545
10571
|
break;
|
10546
10572
|
}
|
10547
10573
|
|
@@ -11054,7 +11080,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
11054
11080
|
return target;
|
11055
11081
|
case PM_BACK_REFERENCE_READ_NODE:
|
11056
11082
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
11057
|
-
|
11083
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
11058
11084
|
return target;
|
11059
11085
|
case PM_GLOBAL_VARIABLE_READ_NODE:
|
11060
11086
|
assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
|
@@ -11192,7 +11218,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
11192
11218
|
}
|
11193
11219
|
case PM_BACK_REFERENCE_READ_NODE:
|
11194
11220
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
11195
|
-
|
11221
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
11196
11222
|
/* fallthrough */
|
11197
11223
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
11198
11224
|
pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
|
@@ -11367,7 +11393,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
11367
11393
|
pm_multi_target_node_targets_append(parser, result, target);
|
11368
11394
|
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
11369
11395
|
// If we get here, then we have a trailing , in a multi target node.
|
11370
|
-
// We'll
|
11396
|
+
// We'll add an implicit rest node to represent this.
|
11371
11397
|
pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
11372
11398
|
pm_multi_target_node_targets_append(parser, result, rest);
|
11373
11399
|
break;
|
@@ -11457,8 +11483,13 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
11457
11483
|
|
11458
11484
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
11459
11485
|
if (context_terminator(context, &parser->current)) break;
|
11460
|
-
} else {
|
11461
|
-
|
11486
|
+
} else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
|
11487
|
+
// This is an inlined version of accept1 because the error that we
|
11488
|
+
// want to add has varargs. If this happens again, we should
|
11489
|
+
// probably extract a helper function.
|
11490
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
11491
|
+
parser->previous.start = parser->previous.end;
|
11492
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
11462
11493
|
}
|
11463
11494
|
}
|
11464
11495
|
|
@@ -13852,7 +13883,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
13852
13883
|
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
13853
13884
|
variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
13854
13885
|
} else {
|
13855
|
-
|
13886
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
|
13856
13887
|
variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
13857
13888
|
}
|
13858
13889
|
}
|
@@ -14161,7 +14192,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14161
14192
|
parser_lex(parser);
|
14162
14193
|
|
14163
14194
|
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
14164
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14195
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
14165
14196
|
// If we get here, then we have an end immediately after a
|
14166
14197
|
// start. In that case we'll create an empty content token and
|
14167
14198
|
// return an uninterpolated string.
|
@@ -14218,15 +14249,19 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14218
14249
|
parser_lex(parser);
|
14219
14250
|
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
14220
14251
|
|
14221
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14252
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
14222
14253
|
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
14223
14254
|
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
14224
14255
|
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
14225
14256
|
} else if (match1(parser, PM_TOKEN_EOF)) {
|
14226
|
-
pm_parser_err_token(parser, &opening,
|
14257
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
14227
14258
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
14259
|
+
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
|
14260
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14228
14261
|
} else {
|
14229
|
-
|
14262
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
|
14263
|
+
parser->previous.start = parser->previous.end;
|
14264
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
14230
14265
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14231
14266
|
}
|
14232
14267
|
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
@@ -14241,7 +14276,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14241
14276
|
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
14242
14277
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
14243
14278
|
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
14244
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14279
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
14245
14280
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
14246
14281
|
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
14247
14282
|
} else {
|
@@ -14332,6 +14367,29 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
14332
14367
|
return current;
|
14333
14368
|
}
|
14334
14369
|
|
14370
|
+
/**
|
14371
|
+
* Append an error to the error list on the parser using the given diagnostic
|
14372
|
+
* ID. This function is a specialization that handles formatting the specific
|
14373
|
+
* kind of error that is being appended.
|
14374
|
+
*/
|
14375
|
+
static void
|
14376
|
+
pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
14377
|
+
switch (diag_id) {
|
14378
|
+
case PM_ERR_HASH_KEY: {
|
14379
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
|
14380
|
+
break;
|
14381
|
+
}
|
14382
|
+
case PM_ERR_UNARY_RECEIVER: {
|
14383
|
+
const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
|
14384
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
|
14385
|
+
break;
|
14386
|
+
}
|
14387
|
+
default:
|
14388
|
+
pm_parser_err_previous(parser, diag_id);
|
14389
|
+
break;
|
14390
|
+
}
|
14391
|
+
}
|
14392
|
+
|
14335
14393
|
/**
|
14336
14394
|
* Parse an expression that begins with the previous node that we just lexed.
|
14337
14395
|
*/
|
@@ -14516,7 +14574,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14516
14574
|
// If we didn't find a terminator and we didn't find a right
|
14517
14575
|
// parenthesis, then this is a syntax error.
|
14518
14576
|
if (!terminator_found) {
|
14519
|
-
|
14577
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
14520
14578
|
}
|
14521
14579
|
|
14522
14580
|
// Parse each statement within the parentheses.
|
@@ -14545,7 +14603,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14545
14603
|
} else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
14546
14604
|
break;
|
14547
14605
|
} else {
|
14548
|
-
|
14606
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
14549
14607
|
}
|
14550
14608
|
}
|
14551
14609
|
|
@@ -15626,10 +15684,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15626
15684
|
* methods to override the unary operators, we should ignore
|
15627
15685
|
* the @ in the same way we do for symbols.
|
15628
15686
|
*/
|
15629
|
-
name.
|
15687
|
+
pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
|
15630
15688
|
|
15631
15689
|
return (pm_node_t *) pm_def_node_create(
|
15632
15690
|
parser,
|
15691
|
+
name_id,
|
15633
15692
|
&name,
|
15634
15693
|
receiver,
|
15635
15694
|
params,
|
@@ -16458,7 +16517,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16458
16517
|
// context of a multiple assignment. We enforce that here. We'll
|
16459
16518
|
// still lex past it though and create a missing node place.
|
16460
16519
|
if (binding_power != PM_BINDING_POWER_STATEMENT) {
|
16461
|
-
|
16520
|
+
pm_parser_err_prefix(parser, diag_id);
|
16462
16521
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
16463
16522
|
}
|
16464
16523
|
|
@@ -16481,7 +16540,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16481
16540
|
parser_lex(parser);
|
16482
16541
|
|
16483
16542
|
pm_token_t operator = parser->previous;
|
16484
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH,
|
16543
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER);
|
16485
16544
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
|
16486
16545
|
|
16487
16546
|
pm_conditional_predicate(receiver);
|
@@ -16491,7 +16550,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16491
16550
|
parser_lex(parser);
|
16492
16551
|
|
16493
16552
|
pm_token_t operator = parser->previous;
|
16494
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16553
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16495
16554
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
|
16496
16555
|
|
16497
16556
|
return (pm_node_t *) node;
|
@@ -16500,7 +16559,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16500
16559
|
parser_lex(parser);
|
16501
16560
|
|
16502
16561
|
pm_token_t operator = parser->previous;
|
16503
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16562
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16504
16563
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
|
16505
16564
|
|
16506
16565
|
return (pm_node_t *) node;
|
@@ -16509,7 +16568,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16509
16568
|
parser_lex(parser);
|
16510
16569
|
|
16511
16570
|
pm_token_t operator = parser->previous;
|
16512
|
-
pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16571
|
+
pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16513
16572
|
|
16514
16573
|
if (accept1(parser, PM_TOKEN_STAR_STAR)) {
|
16515
16574
|
pm_token_t exponent_operator = parser->previous;
|
@@ -16625,7 +16684,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16625
16684
|
parser_lex(parser);
|
16626
16685
|
|
16627
16686
|
pm_token_t operator = parser->previous;
|
16628
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16687
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16629
16688
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
|
16630
16689
|
|
16631
16690
|
return (pm_node_t *) node;
|
@@ -16648,7 +16707,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16648
16707
|
// here because it will provide more context in addition to the
|
16649
16708
|
// recoverable error that we will also add.
|
16650
16709
|
if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
|
16651
|
-
|
16710
|
+
pm_parser_err_prefix(parser, diag_id);
|
16652
16711
|
}
|
16653
16712
|
|
16654
16713
|
// If we get here, then we are assuming this token is closing a
|
@@ -16661,7 +16720,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16661
16720
|
// have an unexpected token.
|
16662
16721
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
|
16663
16722
|
} else {
|
16664
|
-
|
16723
|
+
pm_parser_err_prefix(parser, diag_id);
|
16665
16724
|
}
|
16666
16725
|
|
16667
16726
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
@@ -16895,7 +16954,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16895
16954
|
switch (PM_NODE_TYPE(node)) {
|
16896
16955
|
case PM_BACK_REFERENCE_READ_NODE:
|
16897
16956
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
16898
|
-
|
16957
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
16899
16958
|
/* fallthrough */
|
16900
16959
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
16901
16960
|
parser_lex(parser);
|
@@ -17006,7 +17065,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17006
17065
|
switch (PM_NODE_TYPE(node)) {
|
17007
17066
|
case PM_BACK_REFERENCE_READ_NODE:
|
17008
17067
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
17009
|
-
|
17068
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
17010
17069
|
/* fallthrough */
|
17011
17070
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
17012
17071
|
parser_lex(parser);
|
@@ -17127,7 +17186,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17127
17186
|
switch (PM_NODE_TYPE(node)) {
|
17128
17187
|
case PM_BACK_REFERENCE_READ_NODE:
|
17129
17188
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
17130
|
-
|
17189
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
17131
17190
|
/* fallthrough */
|
17132
17191
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
17133
17192
|
parser_lex(parser);
|
@@ -17791,6 +17850,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17791
17850
|
.current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
|
17792
17851
|
.next_start = NULL,
|
17793
17852
|
.heredoc_end = NULL,
|
17853
|
+
.data_loc = { .start = NULL, .end = NULL },
|
17794
17854
|
.comment_list = { 0 },
|
17795
17855
|
.magic_comment_list = { 0 },
|
17796
17856
|
.warning_list = { 0 },
|