prism 0.30.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +31 -1
- data/README.md +3 -1
- data/config.yml +185 -126
- data/docs/serialization.md +3 -0
- data/ext/prism/api_node.c +2843 -2085
- data/ext/prism/extconf.rb +1 -1
- data/ext/prism/extension.c +35 -25
- data/ext/prism/extension.h +2 -2
- data/include/prism/ast.h +1048 -69
- data/include/prism/defines.h +9 -0
- data/include/prism/diagnostic.h +11 -3
- data/include/prism/options.h +55 -1
- data/include/prism/parser.h +27 -3
- data/include/prism/regexp.h +2 -1
- data/include/prism/util/pm_integer.h +6 -6
- data/include/prism/util/pm_newline_list.h +11 -0
- data/include/prism/util/pm_string.h +1 -0
- data/include/prism/version.h +3 -3
- data/lib/prism/desugar_compiler.rb +111 -74
- data/lib/prism/dispatcher.rb +2 -1
- data/lib/prism/dot_visitor.rb +21 -31
- data/lib/prism/dsl.rb +656 -471
- data/lib/prism/ffi.rb +3 -0
- data/lib/prism/inspect_visitor.rb +285 -57
- data/lib/prism/mutation_compiler.rb +5 -5
- data/lib/prism/node.rb +2282 -4754
- data/lib/prism/node_ext.rb +72 -11
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +28 -28
- data/lib/prism/parse_result.rb +25 -2
- data/lib/prism/reflection.rb +7 -7
- data/lib/prism/serialize.rb +468 -610
- data/lib/prism/translation/parser/compiler.rb +18 -18
- data/lib/prism/translation/parser/lexer.rb +1 -1
- data/lib/prism/translation/parser.rb +3 -3
- data/lib/prism/translation/ripper.rb +14 -14
- data/lib/prism/translation/ruby_parser.rb +43 -7
- data/prism.gemspec +3 -1
- data/rbi/prism/dsl.rbi +521 -0
- data/rbi/prism/node.rbi +1456 -5616
- data/rbi/prism.rbi +16 -16
- data/sig/prism/dsl.rbs +189 -305
- data/sig/prism/node.rbs +702 -603
- data/sig/prism/parse_result.rbs +2 -0
- data/src/diagnostic.c +22 -6
- data/src/node.c +277 -284
- data/src/options.c +18 -0
- data/src/prettyprint.c +99 -108
- data/src/prism.c +1282 -760
- data/src/regexp.c +72 -4
- data/src/serialize.c +165 -50
- data/src/token_type.c +2 -2
- data/src/util/pm_integer.c +14 -14
- data/src/util/pm_newline_list.c +29 -0
- data/src/util/pm_string.c +9 -5
- metadata +4 -2
data/src/regexp.c
CHANGED
@@ -18,6 +18,12 @@ typedef struct {
|
|
18
18
|
/** A pointer to the end of the source that we are parsing. */
|
19
19
|
const uint8_t *end;
|
20
20
|
|
21
|
+
/**
|
22
|
+
* Whether or not the regular expression currently being parsed is in
|
23
|
+
* extended mode, wherein whitespace is ignored and comments are allowed.
|
24
|
+
*/
|
25
|
+
bool extended_mode;
|
26
|
+
|
21
27
|
/** Whether the encoding has changed from the default. */
|
22
28
|
bool encoding_changed;
|
23
29
|
|
@@ -418,6 +424,19 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
|
|
418
424
|
return false;
|
419
425
|
}
|
420
426
|
|
427
|
+
/**
|
428
|
+
* True if the given key is set in the options.
|
429
|
+
*/
|
430
|
+
static uint8_t
|
431
|
+
pm_regexp_options_state(pm_regexp_options_t *options, uint8_t key) {
|
432
|
+
if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
|
433
|
+
key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
|
434
|
+
return options->values[key];
|
435
|
+
}
|
436
|
+
|
437
|
+
return false;
|
438
|
+
}
|
439
|
+
|
421
440
|
/**
|
422
441
|
* Groups can have quite a few different patterns for syntax. They basically
|
423
442
|
* just wrap a set of expressions, but they can potentially have options after a
|
@@ -443,6 +462,9 @@ static bool
|
|
443
462
|
pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
|
444
463
|
const uint8_t *group_start = parser->cursor;
|
445
464
|
|
465
|
+
pm_regexp_options_t options;
|
466
|
+
pm_regexp_options_init(&options);
|
467
|
+
|
446
468
|
// First, parse any options for the group.
|
447
469
|
if (pm_regexp_char_accept(parser, '?')) {
|
448
470
|
if (pm_regexp_char_is_eof(parser)) {
|
@@ -450,9 +472,6 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
|
|
450
472
|
return false;
|
451
473
|
}
|
452
474
|
|
453
|
-
pm_regexp_options_t options;
|
454
|
-
pm_regexp_options_init(&options);
|
455
|
-
|
456
475
|
switch (*parser->cursor) {
|
457
476
|
case '#': { // inline comments
|
458
477
|
parser->cursor++;
|
@@ -560,6 +579,18 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
|
|
560
579
|
return false;
|
561
580
|
}
|
562
581
|
|
582
|
+
// If we are at the end of the group of options and there is no
|
583
|
+
// subexpression, then we are going to be setting the options
|
584
|
+
// for the parent group. In this case we are safe to return now.
|
585
|
+
if (*parser->cursor == ')') {
|
586
|
+
if (pm_regexp_options_state(&options, 'x') == PM_REGEXP_OPTION_STATE_ADDED) {
|
587
|
+
parser->extended_mode = true;
|
588
|
+
}
|
589
|
+
|
590
|
+
parser->cursor++;
|
591
|
+
return true;
|
592
|
+
}
|
593
|
+
|
563
594
|
// If we hit a -, then we're done parsing options.
|
564
595
|
if (*parser->cursor != '-') break;
|
565
596
|
|
@@ -577,6 +608,24 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
|
|
577
608
|
if (pm_regexp_char_is_eof(parser)) {
|
578
609
|
return false;
|
579
610
|
}
|
611
|
+
|
612
|
+
// If we are at the end of the group of options and there is no
|
613
|
+
// subexpression, then we are going to be setting the options
|
614
|
+
// for the parent group. In this case we are safe to return now.
|
615
|
+
if (*parser->cursor == ')') {
|
616
|
+
switch (pm_regexp_options_state(&options, 'x')) {
|
617
|
+
case PM_REGEXP_OPTION_STATE_ADDED:
|
618
|
+
parser->extended_mode = true;
|
619
|
+
break;
|
620
|
+
case PM_REGEXP_OPTION_STATE_REMOVED:
|
621
|
+
parser->extended_mode = false;
|
622
|
+
break;
|
623
|
+
}
|
624
|
+
|
625
|
+
parser->cursor++;
|
626
|
+
return true;
|
627
|
+
}
|
628
|
+
|
580
629
|
break;
|
581
630
|
default:
|
582
631
|
parser->cursor++;
|
@@ -585,15 +634,27 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
|
|
585
634
|
}
|
586
635
|
}
|
587
636
|
|
637
|
+
bool extended_mode = parser->extended_mode;
|
638
|
+
switch (pm_regexp_options_state(&options, 'x')) {
|
639
|
+
case PM_REGEXP_OPTION_STATE_ADDED:
|
640
|
+
parser->extended_mode = true;
|
641
|
+
break;
|
642
|
+
case PM_REGEXP_OPTION_STATE_REMOVED:
|
643
|
+
parser->extended_mode = false;
|
644
|
+
break;
|
645
|
+
}
|
646
|
+
|
588
647
|
// Now, parse the expressions within this group.
|
589
648
|
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')') {
|
590
649
|
if (!pm_regexp_parse_expression(parser, (uint16_t) (depth + 1))) {
|
650
|
+
parser->extended_mode = extended_mode;
|
591
651
|
return false;
|
592
652
|
}
|
593
653
|
pm_regexp_char_accept(parser, '|');
|
594
654
|
}
|
595
655
|
|
596
656
|
// Finally, make sure we have a closing parenthesis.
|
657
|
+
parser->extended_mode = extended_mode;
|
597
658
|
if (pm_regexp_char_expect(parser, ')')) return true;
|
598
659
|
|
599
660
|
pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern with unmatched parenthesis");
|
@@ -641,6 +702,12 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser, uint16_t depth) {
|
|
641
702
|
parser->cursor++;
|
642
703
|
pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "unmatched close parenthesis");
|
643
704
|
return true;
|
705
|
+
case '#':
|
706
|
+
if (parser->extended_mode) {
|
707
|
+
if (!pm_regexp_char_find(parser, '\n')) parser->cursor = parser->end;
|
708
|
+
return true;
|
709
|
+
}
|
710
|
+
/* fallthrough */
|
644
711
|
default: {
|
645
712
|
size_t width;
|
646
713
|
if (!parser->encoding_changed) {
|
@@ -702,12 +769,13 @@ pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
|
|
702
769
|
* groups.
|
703
770
|
*/
|
704
771
|
PRISM_EXPORTED_FUNCTION void
|
705
|
-
pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data) {
|
772
|
+
pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data) {
|
706
773
|
pm_regexp_parse_pattern(&(pm_regexp_parser_t) {
|
707
774
|
.parser = parser,
|
708
775
|
.start = source,
|
709
776
|
.cursor = source,
|
710
777
|
.end = source + size,
|
778
|
+
.extended_mode = extended_mode,
|
711
779
|
.encoding_changed = parser->encoding_changed,
|
712
780
|
.encoding = parser->encoding,
|
713
781
|
.name_callback = name_callback,
|