prism 0.30.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +31 -1
  3. data/README.md +3 -1
  4. data/config.yml +185 -126
  5. data/docs/serialization.md +3 -0
  6. data/ext/prism/api_node.c +2843 -2085
  7. data/ext/prism/extconf.rb +1 -1
  8. data/ext/prism/extension.c +35 -25
  9. data/ext/prism/extension.h +2 -2
  10. data/include/prism/ast.h +1048 -69
  11. data/include/prism/defines.h +9 -0
  12. data/include/prism/diagnostic.h +11 -3
  13. data/include/prism/options.h +55 -1
  14. data/include/prism/parser.h +27 -3
  15. data/include/prism/regexp.h +2 -1
  16. data/include/prism/util/pm_integer.h +6 -6
  17. data/include/prism/util/pm_newline_list.h +11 -0
  18. data/include/prism/util/pm_string.h +1 -0
  19. data/include/prism/version.h +3 -3
  20. data/lib/prism/desugar_compiler.rb +111 -74
  21. data/lib/prism/dispatcher.rb +2 -1
  22. data/lib/prism/dot_visitor.rb +21 -31
  23. data/lib/prism/dsl.rb +656 -471
  24. data/lib/prism/ffi.rb +3 -0
  25. data/lib/prism/inspect_visitor.rb +285 -57
  26. data/lib/prism/mutation_compiler.rb +5 -5
  27. data/lib/prism/node.rb +2282 -4754
  28. data/lib/prism/node_ext.rb +72 -11
  29. data/lib/prism/parse_result/errors.rb +65 -0
  30. data/lib/prism/parse_result/newlines.rb +28 -28
  31. data/lib/prism/parse_result.rb +25 -2
  32. data/lib/prism/reflection.rb +7 -7
  33. data/lib/prism/serialize.rb +468 -610
  34. data/lib/prism/translation/parser/compiler.rb +18 -18
  35. data/lib/prism/translation/parser/lexer.rb +1 -1
  36. data/lib/prism/translation/parser.rb +3 -3
  37. data/lib/prism/translation/ripper.rb +14 -14
  38. data/lib/prism/translation/ruby_parser.rb +43 -7
  39. data/prism.gemspec +3 -1
  40. data/rbi/prism/dsl.rbi +521 -0
  41. data/rbi/prism/node.rbi +1456 -5616
  42. data/rbi/prism.rbi +16 -16
  43. data/sig/prism/dsl.rbs +189 -305
  44. data/sig/prism/node.rbs +702 -603
  45. data/sig/prism/parse_result.rbs +2 -0
  46. data/src/diagnostic.c +22 -6
  47. data/src/node.c +277 -284
  48. data/src/options.c +18 -0
  49. data/src/prettyprint.c +99 -108
  50. data/src/prism.c +1282 -760
  51. data/src/regexp.c +72 -4
  52. data/src/serialize.c +165 -50
  53. data/src/token_type.c +2 -2
  54. data/src/util/pm_integer.c +14 -14
  55. data/src/util/pm_newline_list.c +29 -0
  56. data/src/util/pm_string.c +9 -5
  57. metadata +4 -2
data/src/regexp.c CHANGED
@@ -18,6 +18,12 @@ typedef struct {
18
18
  /** A pointer to the end of the source that we are parsing. */
19
19
  const uint8_t *end;
20
20
 
21
+ /**
22
+ * Whether or not the regular expression currently being parsed is in
23
+ * extended mode, wherein whitespace is ignored and comments are allowed.
24
+ */
25
+ bool extended_mode;
26
+
21
27
  /** Whether the encoding has changed from the default. */
22
28
  bool encoding_changed;
23
29
 
@@ -418,6 +424,19 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
418
424
  return false;
419
425
  }
420
426
 
427
+ /**
428
+ * True if the given key is set in the options.
429
+ */
430
+ static uint8_t
431
+ pm_regexp_options_state(pm_regexp_options_t *options, uint8_t key) {
432
+ if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
433
+ key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
434
+ return options->values[key];
435
+ }
436
+
437
+ return false;
438
+ }
439
+
421
440
  /**
422
441
  * Groups can have quite a few different patterns for syntax. They basically
423
442
  * just wrap a set of expressions, but they can potentially have options after a
@@ -443,6 +462,9 @@ static bool
443
462
  pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
444
463
  const uint8_t *group_start = parser->cursor;
445
464
 
465
+ pm_regexp_options_t options;
466
+ pm_regexp_options_init(&options);
467
+
446
468
  // First, parse any options for the group.
447
469
  if (pm_regexp_char_accept(parser, '?')) {
448
470
  if (pm_regexp_char_is_eof(parser)) {
@@ -450,9 +472,6 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
450
472
  return false;
451
473
  }
452
474
 
453
- pm_regexp_options_t options;
454
- pm_regexp_options_init(&options);
455
-
456
475
  switch (*parser->cursor) {
457
476
  case '#': { // inline comments
458
477
  parser->cursor++;
@@ -560,6 +579,18 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
560
579
  return false;
561
580
  }
562
581
 
582
+ // If we are at the end of the group of options and there is no
583
+ // subexpression, then we are going to be setting the options
584
+ // for the parent group. In this case we are safe to return now.
585
+ if (*parser->cursor == ')') {
586
+ if (pm_regexp_options_state(&options, 'x') == PM_REGEXP_OPTION_STATE_ADDED) {
587
+ parser->extended_mode = true;
588
+ }
589
+
590
+ parser->cursor++;
591
+ return true;
592
+ }
593
+
563
594
  // If we hit a -, then we're done parsing options.
564
595
  if (*parser->cursor != '-') break;
565
596
 
@@ -577,6 +608,24 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
577
608
  if (pm_regexp_char_is_eof(parser)) {
578
609
  return false;
579
610
  }
611
+
612
+ // If we are at the end of the group of options and there is no
613
+ // subexpression, then we are going to be setting the options
614
+ // for the parent group. In this case we are safe to return now.
615
+ if (*parser->cursor == ')') {
616
+ switch (pm_regexp_options_state(&options, 'x')) {
617
+ case PM_REGEXP_OPTION_STATE_ADDED:
618
+ parser->extended_mode = true;
619
+ break;
620
+ case PM_REGEXP_OPTION_STATE_REMOVED:
621
+ parser->extended_mode = false;
622
+ break;
623
+ }
624
+
625
+ parser->cursor++;
626
+ return true;
627
+ }
628
+
580
629
  break;
581
630
  default:
582
631
  parser->cursor++;
@@ -585,15 +634,27 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
585
634
  }
586
635
  }
587
636
 
637
+ bool extended_mode = parser->extended_mode;
638
+ switch (pm_regexp_options_state(&options, 'x')) {
639
+ case PM_REGEXP_OPTION_STATE_ADDED:
640
+ parser->extended_mode = true;
641
+ break;
642
+ case PM_REGEXP_OPTION_STATE_REMOVED:
643
+ parser->extended_mode = false;
644
+ break;
645
+ }
646
+
588
647
  // Now, parse the expressions within this group.
589
648
  while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')') {
590
649
  if (!pm_regexp_parse_expression(parser, (uint16_t) (depth + 1))) {
650
+ parser->extended_mode = extended_mode;
591
651
  return false;
592
652
  }
593
653
  pm_regexp_char_accept(parser, '|');
594
654
  }
595
655
 
596
656
  // Finally, make sure we have a closing parenthesis.
657
+ parser->extended_mode = extended_mode;
597
658
  if (pm_regexp_char_expect(parser, ')')) return true;
598
659
 
599
660
  pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern with unmatched parenthesis");
@@ -641,6 +702,12 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser, uint16_t depth) {
641
702
  parser->cursor++;
642
703
  pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "unmatched close parenthesis");
643
704
  return true;
705
+ case '#':
706
+ if (parser->extended_mode) {
707
+ if (!pm_regexp_char_find(parser, '\n')) parser->cursor = parser->end;
708
+ return true;
709
+ }
710
+ /* fallthrough */
644
711
  default: {
645
712
  size_t width;
646
713
  if (!parser->encoding_changed) {
@@ -702,12 +769,13 @@ pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
702
769
  * groups.
703
770
  */
704
771
  PRISM_EXPORTED_FUNCTION void
705
- pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data) {
772
+ pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data) {
706
773
  pm_regexp_parse_pattern(&(pm_regexp_parser_t) {
707
774
  .parser = parser,
708
775
  .start = source,
709
776
  .cursor = source,
710
777
  .end = source + size,
778
+ .extended_mode = extended_mode,
711
779
  .encoding_changed = parser->encoding_changed,
712
780
  .encoding = parser->encoding,
713
781
  .name_callback = name_callback,