prism 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -1
  3. data/Makefile +6 -0
  4. data/README.md +2 -0
  5. data/config.yml +21 -20
  6. data/docs/configuration.md +2 -0
  7. data/docs/javascript.md +90 -0
  8. data/docs/releasing.md +27 -0
  9. data/docs/ruby_api.md +2 -0
  10. data/ext/prism/api_node.c +66 -68
  11. data/ext/prism/extension.c +73 -0
  12. data/ext/prism/extension.h +1 -1
  13. data/include/prism/ast.h +40 -40
  14. data/include/prism/defines.h +9 -0
  15. data/include/prism/enc/pm_encoding.h +1 -0
  16. data/include/prism/node.h +0 -17
  17. data/include/prism/parser.h +1 -0
  18. data/include/prism/prettyprint.h +15 -0
  19. data/include/prism/util/pm_buffer.h +10 -4
  20. data/include/prism/util/pm_constant_pool.h +1 -1
  21. data/include/prism/util/pm_newline_list.h +1 -1
  22. data/include/prism/version.h +2 -2
  23. data/include/prism.h +11 -11
  24. data/lib/prism/compiler.rb +0 -3
  25. data/lib/prism/debug.rb +20 -6
  26. data/lib/prism/desugar_compiler.rb +1 -1
  27. data/lib/prism/dispatcher.rb +0 -14
  28. data/lib/prism/dsl.rb +8 -13
  29. data/lib/prism/ffi.rb +25 -0
  30. data/lib/prism/lex_compat.rb +1 -1
  31. data/lib/prism/mutation_compiler.rb +3 -8
  32. data/lib/prism/node.rb +123 -159
  33. data/lib/prism/node_ext.rb +23 -16
  34. data/lib/prism/parse_result.rb +21 -5
  35. data/lib/prism/pattern.rb +3 -3
  36. data/lib/prism/serialize.rb +900 -304
  37. data/lib/prism/visitor.rb +0 -3
  38. data/prism.gemspec +8 -1
  39. data/rbi/prism.rbi +7261 -0
  40. data/rbi/prism_static.rbi +182 -0
  41. data/sig/prism.rbs +4439 -0
  42. data/sig/prism_static.rbs +110 -0
  43. data/src/enc/pm_unicode.c +1 -1
  44. data/src/node.c +28 -29
  45. data/src/prettyprint.c +7674 -1647
  46. data/src/prism.c +353 -300
  47. data/src/regexp.c +2 -0
  48. data/src/serialize.c +392 -381
  49. data/src/util/pm_buffer.c +47 -12
  50. data/src/util/pm_constant_pool.c +2 -2
  51. data/src/util/pm_newline_list.c +8 -54
  52. metadata +9 -2
data/src/prism.c CHANGED
@@ -40,6 +40,7 @@ debug_context(pm_context_t context) {
40
40
  case PM_CONTEXT_BLOCK_BRACES: return "BLOCK_BRACES";
41
41
  case PM_CONTEXT_BLOCK_KEYWORDS: return "BLOCK_KEYWORDS";
42
42
  case PM_CONTEXT_FOR: return "FOR";
43
+ case PM_CONTEXT_FOR_INDEX: return "FOR_INDEX";
43
44
  case PM_CONTEXT_IF: return "IF";
44
45
  case PM_CONTEXT_MAIN: return "MAIN";
45
46
  case PM_CONTEXT_MODULE: return "MODULE";
@@ -80,14 +81,12 @@ debug_contexts(pm_parser_t *parser) {
80
81
  }
81
82
 
82
83
  PRISM_ATTRIBUTE_UNUSED static void
83
- debug_node(const char *message, pm_parser_t *parser, pm_node_t *node) {
84
- pm_buffer_t buffer;
85
- if (!pm_buffer_init(&buffer)) return;
86
-
87
- pm_prettyprint(parser, node, &buffer);
84
+ debug_node(const pm_parser_t *parser, const pm_node_t *node) {
85
+ pm_buffer_t output_buffer = { 0 };
86
+ pm_prettyprint(&output_buffer, parser, node);
88
87
 
89
- fprintf(stderr, "%s\n%.*s\n", message, (int) buffer.length, buffer.value);
90
- pm_buffer_free(&buffer);
88
+ fprintf(stderr, "%.*s", (int) output_buffer.length, output_buffer.value);
89
+ pm_buffer_free(&output_buffer);
91
90
  }
92
91
 
93
92
  PRISM_ATTRIBUTE_UNUSED static void
@@ -648,87 +647,6 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
648
647
  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
649
648
  }
650
649
 
651
- /******************************************************************************/
652
- /* Scope node functions */
653
- /******************************************************************************/
654
-
655
- // Generate a scope node from the given node.
656
- void
657
- pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_t *previous, pm_parser_t *parser) {
658
- scope->base.type = PM_SCOPE_NODE;
659
- scope->base.location.start = node->location.start;
660
- scope->base.location.end = node->location.end;
661
-
662
- scope->previous = previous;
663
- scope->parser = parser;
664
- scope->ast_node = (pm_node_t *)node;
665
- scope->parameters = NULL;
666
- scope->body = NULL;
667
- scope->constants = NULL;
668
- if (previous) {
669
- scope->constants = previous->constants;
670
- }
671
- scope->index_lookup_table = NULL;
672
-
673
- pm_constant_id_list_init(&scope->locals);
674
-
675
- switch (PM_NODE_TYPE(node)) {
676
- case PM_BLOCK_NODE: {
677
- pm_block_node_t *cast = (pm_block_node_t *) node;
678
- if (cast->parameters) scope->parameters = cast->parameters->parameters;
679
- scope->body = cast->body;
680
- scope->locals = cast->locals;
681
- break;
682
- }
683
- case PM_CLASS_NODE: {
684
- pm_class_node_t *cast = (pm_class_node_t *) node;
685
- scope->body = cast->body;
686
- scope->locals = cast->locals;
687
- break;
688
- }
689
- case PM_DEF_NODE: {
690
- pm_def_node_t *cast = (pm_def_node_t *) node;
691
- scope->parameters = cast->parameters;
692
- scope->body = cast->body;
693
- scope->locals = cast->locals;
694
- break;
695
- }
696
- case PM_FOR_NODE: {
697
- pm_for_node_t *cast = (pm_for_node_t *)node;
698
- scope->body = (pm_node_t *)cast->statements;
699
- break;
700
- }
701
- case PM_LAMBDA_NODE: {
702
- pm_lambda_node_t *cast = (pm_lambda_node_t *) node;
703
- if (cast->parameters) scope->parameters = cast->parameters->parameters;
704
- scope->body = cast->body;
705
- scope->locals = cast->locals;
706
- break;
707
- }
708
- case PM_MODULE_NODE: {
709
- pm_module_node_t *cast = (pm_module_node_t *) node;
710
- scope->body = cast->body;
711
- scope->locals = cast->locals;
712
- break;
713
- }
714
- case PM_PROGRAM_NODE: {
715
- pm_program_node_t *cast = (pm_program_node_t *) node;
716
- scope->body = (pm_node_t *) cast->statements;
717
- scope->locals = cast->locals;
718
- break;
719
- }
720
- case PM_SINGLETON_CLASS_NODE: {
721
- pm_singleton_class_node_t *cast = (pm_singleton_class_node_t *) node;
722
- scope->body = cast->body;
723
- scope->locals = cast->locals;
724
- break;
725
- }
726
- default:
727
- assert(false && "unreachable");
728
- break;
729
- }
730
- }
731
-
732
650
  /******************************************************************************/
733
651
  /* Node creation functions */
734
652
  /******************************************************************************/
@@ -765,11 +683,15 @@ parse_decimal_number(pm_parser_t *parser, const uint8_t *start, const uint8_t *e
765
683
  return (uint32_t) value;
766
684
  }
767
685
 
686
+ // When you have an encoding flag on a regular expression, it takes precedence
687
+ // over all of the previously set encoding flags. So we need to mask off any
688
+ // previously set encoding flags before setting the new one.
689
+ #define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
690
+
768
691
  // Parse out the options for a regular expression.
769
692
  static inline pm_node_flags_t
770
693
  pm_regular_expression_flags_create(const pm_token_t *closing) {
771
694
  pm_node_flags_t flags = 0;
772
- pm_node_flags_t mask = (uint16_t) 0xFF0F;
773
695
 
774
696
  if (closing->type == PM_TOKEN_REGEXP_END) {
775
697
  for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
@@ -779,10 +701,10 @@ pm_regular_expression_flags_create(const pm_token_t *closing) {
779
701
  case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
780
702
  case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
781
703
 
782
- case 'e': flags &= mask; flags |= PM_REGULAR_EXPRESSION_FLAGS_EUC_JP; break;
783
- case 'n': flags &= mask; flags |= PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT; break;
784
- case 's': flags &= mask; flags |= PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J; break;
785
- case 'u': flags &= mask; flags |= PM_REGULAR_EXPRESSION_FLAGS_UTF_8; break;
704
+ case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
705
+ case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
706
+ case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
707
+ case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
786
708
 
787
709
  default: assert(false && "unreachable");
788
710
  }
@@ -792,6 +714,8 @@ pm_regular_expression_flags_create(const pm_token_t *closing) {
792
714
  return flags;
793
715
  }
794
716
 
717
+ #undef PM_REGULAR_EXPRESSION_ENCODING_MASK
718
+
795
719
  // Allocate and initialize a new StatementsNode node.
796
720
  static pm_statements_node_t *
797
721
  pm_statements_node_create(pm_parser_t *parser);
@@ -2664,10 +2588,10 @@ pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening
2664
2588
  },
2665
2589
  },
2666
2590
  .constant = NULL,
2667
- .kwrest = NULL,
2668
2591
  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2669
2592
  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2670
- .assocs = PM_EMPTY_NODE_LIST
2593
+ .elements = PM_EMPTY_NODE_LIST,
2594
+ .rest = NULL
2671
2595
  };
2672
2596
 
2673
2597
  return node;
@@ -2675,27 +2599,44 @@ pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening
2675
2599
 
2676
2600
  // Allocate and initialize a new hash pattern node.
2677
2601
  static pm_hash_pattern_node_t *
2678
- pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *assocs) {
2602
+ pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
2679
2603
  pm_hash_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_hash_pattern_node_t);
2680
2604
 
2605
+ const uint8_t *start;
2606
+ const uint8_t *end;
2607
+
2608
+ if (elements->size > 0) {
2609
+ if (rest) {
2610
+ start = elements->nodes[0]->location.start;
2611
+ end = rest->location.end;
2612
+ } else {
2613
+ start = elements->nodes[0]->location.start;
2614
+ end = elements->nodes[elements->size - 1]->location.end;
2615
+ }
2616
+ } else {
2617
+ assert(rest != NULL);
2618
+ start = rest->location.start;
2619
+ end = rest->location.end;
2620
+ }
2621
+
2681
2622
  *node = (pm_hash_pattern_node_t) {
2682
2623
  {
2683
2624
  .type = PM_HASH_PATTERN_NODE,
2684
2625
  .location = {
2685
- .start = assocs->nodes[0]->location.start,
2686
- .end = assocs->nodes[assocs->size - 1]->location.end
2626
+ .start = start,
2627
+ .end = end
2687
2628
  },
2688
2629
  },
2689
2630
  .constant = NULL,
2690
- .kwrest = NULL,
2691
- .assocs = PM_EMPTY_NODE_LIST,
2631
+ .elements = PM_EMPTY_NODE_LIST,
2632
+ .rest = rest,
2692
2633
  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2693
2634
  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2694
2635
  };
2695
2636
 
2696
- for (size_t index = 0; index < assocs->size; index++) {
2697
- pm_node_t *assoc = assocs->nodes[index];
2698
- pm_node_list_append(&node->assocs, assoc);
2637
+ for (size_t index = 0; index < elements->size; index++) {
2638
+ pm_node_t *element = elements->nodes[index];
2639
+ pm_node_list_append(&node->elements, element);
2699
2640
  }
2700
2641
 
2701
2642
  return node;
@@ -3692,7 +3633,9 @@ pm_multi_target_node_create(pm_parser_t *parser) {
3692
3633
  .type = PM_MULTI_TARGET_NODE,
3693
3634
  .location = { .start = NULL, .end = NULL }
3694
3635
  },
3695
- .targets = PM_EMPTY_NODE_LIST,
3636
+ .lefts = PM_EMPTY_NODE_LIST,
3637
+ .rest = NULL,
3638
+ .rights = PM_EMPTY_NODE_LIST,
3696
3639
  .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3697
3640
  .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
3698
3641
  };
@@ -3702,8 +3645,19 @@ pm_multi_target_node_create(pm_parser_t *parser) {
3702
3645
 
3703
3646
  // Append a target to a MultiTargetNode node.
3704
3647
  static void
3705
- pm_multi_target_node_targets_append(pm_multi_target_node_t *node, pm_node_t *target) {
3706
- pm_node_list_append(&node->targets, target);
3648
+ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
3649
+ if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
3650
+ if (node->rest == NULL) {
3651
+ node->rest = target;
3652
+ } else {
3653
+ pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
3654
+ pm_node_list_append(&node->rights, target);
3655
+ }
3656
+ } else if (node->rest == NULL) {
3657
+ pm_node_list_append(&node->lefts, target);
3658
+ } else {
3659
+ pm_node_list_append(&node->rights, target);
3660
+ }
3707
3661
 
3708
3662
  if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
3709
3663
  node->base.location.start = target->location.start;
@@ -3714,6 +3668,20 @@ pm_multi_target_node_targets_append(pm_multi_target_node_t *node, pm_node_t *tar
3714
3668
  }
3715
3669
  }
3716
3670
 
3671
+ // Set the opening of a MultiTargetNode node.
3672
+ static void
3673
+ pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
3674
+ node->base.location.start = lparen->start;
3675
+ node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
3676
+ }
3677
+
3678
+ // Set the closing of a MultiTargetNode node.
3679
+ static void
3680
+ pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
3681
+ node->base.location.end = rparen->end;
3682
+ node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
3683
+ }
3684
+
3717
3685
  // Allocate a new MultiWriteNode node.
3718
3686
  static pm_multi_write_node_t *
3719
3687
  pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
@@ -3727,7 +3695,9 @@ pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target,
3727
3695
  .end = value->location.end
3728
3696
  }
3729
3697
  },
3730
- .targets = target->targets,
3698
+ .lefts = target->lefts,
3699
+ .rest = target->rest,
3700
+ .rights = target->rights,
3731
3701
  .lparen_loc = target->lparen_loc,
3732
3702
  .rparen_loc = target->rparen_loc,
3733
3703
  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
@@ -4154,37 +4124,6 @@ pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening
4154
4124
  return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_EMPTY_STRING);
4155
4125
  }
4156
4126
 
4157
- // Allocate a new RequiredDestructuredParameterNode node.
4158
- static pm_required_destructured_parameter_node_t *
4159
- pm_required_destructured_parameter_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4160
- pm_required_destructured_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_required_destructured_parameter_node_t);
4161
-
4162
- *node = (pm_required_destructured_parameter_node_t) {
4163
- {
4164
- .type = PM_REQUIRED_DESTRUCTURED_PARAMETER_NODE,
4165
- .location = PM_LOCATION_TOKEN_VALUE(opening)
4166
- },
4167
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4168
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4169
- .parameters = PM_EMPTY_NODE_LIST
4170
- };
4171
-
4172
- return node;
4173
- }
4174
-
4175
- // Append a new parameter to the given RequiredDestructuredParameterNode node.
4176
- static void
4177
- pm_required_destructured_parameter_node_append_parameter(pm_required_destructured_parameter_node_t *node, pm_node_t *parameter) {
4178
- pm_node_list_append(&node->parameters, parameter);
4179
- }
4180
-
4181
- // Set the closing token of the given RequiredDestructuredParameterNode node.
4182
- static void
4183
- pm_required_destructured_parameter_node_closing_set(pm_required_destructured_parameter_node_t *node, const pm_token_t *closing) {
4184
- node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
4185
- node->base.location.end = closing->end;
4186
- }
4187
-
4188
4127
  // Allocate a new RequiredParameterNode node.
4189
4128
  static pm_required_parameter_node_t *
4190
4129
  pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
@@ -5668,6 +5607,8 @@ context_terminator(pm_context_t context, pm_token_t *token) {
5668
5607
  case PM_CONTEXT_FOR:
5669
5608
  case PM_CONTEXT_ENSURE:
5670
5609
  return token->type == PM_TOKEN_KEYWORD_END;
5610
+ case PM_CONTEXT_FOR_INDEX:
5611
+ return token->type == PM_TOKEN_KEYWORD_IN;
5671
5612
  case PM_CONTEXT_CASE_WHEN:
5672
5613
  return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
5673
5614
  case PM_CONTEXT_CASE_IN:
@@ -6103,16 +6044,21 @@ static pm_token_type_t
6103
6044
  lex_identifier(pm_parser_t *parser, bool previous_command_start) {
6104
6045
  // Lex as far as we can into the current identifier.
6105
6046
  size_t width;
6106
- while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
6107
- parser->current.end += width;
6047
+ const uint8_t *end = parser->end;
6048
+ const uint8_t *current_start = parser->current.start;
6049
+ const uint8_t *current_end = parser->current.end;
6050
+
6051
+ while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
6052
+ current_end += width;
6108
6053
  }
6054
+ parser->current.end = current_end;
6109
6055
 
6110
6056
  // Now cache the length of the identifier so that we can quickly compare it
6111
6057
  // against known keywords.
6112
- width = (size_t) (parser->current.end - parser->current.start);
6058
+ width = (size_t) (current_end - current_start);
6113
6059
 
6114
- if (parser->current.end < parser->end) {
6115
- if (((parser->current.end + 1 >= parser->end) || (parser->current.end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
6060
+ if (current_end < end) {
6061
+ if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
6116
6062
  // First we'll attempt to extend the identifier by a ! or ?. Then we'll
6117
6063
  // check if we're returning the defined? keyword or just an identifier.
6118
6064
  width++;
@@ -6222,7 +6168,10 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
6222
6168
  }
6223
6169
  }
6224
6170
 
6225
- return parser->encoding.isupper_char(parser->current.start, parser->end - parser->current.start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
6171
+ if (parser->encoding_changed) {
6172
+ return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
6173
+ }
6174
+ return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
6226
6175
  }
6227
6176
 
6228
6177
  // Returns true if the current token that the parser is considering is at the
@@ -6423,24 +6372,24 @@ escape_byte(uint8_t value, const uint8_t flags) {
6423
6372
  static inline void
6424
6373
  escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t *start, const uint8_t *end, uint32_t value) {
6425
6374
  if (value <= 0x7F) { // 0xxxxxxx
6426
- pm_buffer_append_u8(buffer, (uint8_t) value);
6375
+ pm_buffer_append_byte(buffer, (uint8_t) value);
6427
6376
  } else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
6428
- pm_buffer_append_u8(buffer, (uint8_t) (0xC0 | (value >> 6)));
6429
- pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
6377
+ pm_buffer_append_byte(buffer, (uint8_t) (0xC0 | (value >> 6)));
6378
+ pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
6430
6379
  } else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
6431
- pm_buffer_append_u8(buffer, (uint8_t) (0xE0 | (value >> 12)));
6432
- pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
6433
- pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
6380
+ pm_buffer_append_byte(buffer, (uint8_t) (0xE0 | (value >> 12)));
6381
+ pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
6382
+ pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
6434
6383
  } else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
6435
- pm_buffer_append_u8(buffer, (uint8_t) (0xF0 | (value >> 18)));
6436
- pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
6437
- pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
6438
- pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
6384
+ pm_buffer_append_byte(buffer, (uint8_t) (0xF0 | (value >> 18)));
6385
+ pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
6386
+ pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
6387
+ pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
6439
6388
  } else {
6440
6389
  pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
6441
- pm_buffer_append_u8(buffer, 0xEF);
6442
- pm_buffer_append_u8(buffer, 0xBF);
6443
- pm_buffer_append_u8(buffer, 0xBD);
6390
+ pm_buffer_append_byte(buffer, 0xEF);
6391
+ pm_buffer_append_byte(buffer, 0xBF);
6392
+ pm_buffer_append_byte(buffer, 0xBD);
6444
6393
  }
6445
6394
  }
6446
6395
 
@@ -6466,18 +6415,18 @@ escape_write_byte(pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
6466
6415
  uint8_t byte2 = (uint8_t) (byte & 0xF);
6467
6416
 
6468
6417
  if (byte1 >= 0xA) {
6469
- pm_buffer_append_u8(buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
6418
+ pm_buffer_append_byte(buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
6470
6419
  } else {
6471
- pm_buffer_append_u8(buffer, (uint8_t) (byte1 + '0'));
6420
+ pm_buffer_append_byte(buffer, (uint8_t) (byte1 + '0'));
6472
6421
  }
6473
6422
 
6474
6423
  if (byte2 >= 0xA) {
6475
- pm_buffer_append_u8(buffer, (uint8_t) (byte2 - 0xA + 'A'));
6424
+ pm_buffer_append_byte(buffer, (uint8_t) (byte2 - 0xA + 'A'));
6476
6425
  } else {
6477
- pm_buffer_append_u8(buffer, (uint8_t) (byte2 + '0'));
6426
+ pm_buffer_append_byte(buffer, (uint8_t) (byte2 + '0'));
6478
6427
  }
6479
6428
  } else {
6480
- pm_buffer_append_u8(buffer, byte);
6429
+ pm_buffer_append_byte(buffer, byte);
6481
6430
  }
6482
6431
  }
6483
6432
 
@@ -6487,57 +6436,57 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
6487
6436
  switch (peek(parser)) {
6488
6437
  case '\\': {
6489
6438
  parser->current.end++;
6490
- pm_buffer_append_u8(buffer, '\\');
6439
+ pm_buffer_append_byte(buffer, '\\');
6491
6440
  return;
6492
6441
  }
6493
6442
  case '\'': {
6494
6443
  parser->current.end++;
6495
- pm_buffer_append_u8(buffer, '\'');
6444
+ pm_buffer_append_byte(buffer, '\'');
6496
6445
  return;
6497
6446
  }
6498
6447
  case 'a': {
6499
6448
  parser->current.end++;
6500
- pm_buffer_append_u8(buffer, '\a');
6449
+ pm_buffer_append_byte(buffer, '\a');
6501
6450
  return;
6502
6451
  }
6503
6452
  case 'b': {
6504
6453
  parser->current.end++;
6505
- pm_buffer_append_u8(buffer, '\b');
6454
+ pm_buffer_append_byte(buffer, '\b');
6506
6455
  return;
6507
6456
  }
6508
6457
  case 'e': {
6509
6458
  parser->current.end++;
6510
- pm_buffer_append_u8(buffer, '\033');
6459
+ pm_buffer_append_byte(buffer, '\033');
6511
6460
  return;
6512
6461
  }
6513
6462
  case 'f': {
6514
6463
  parser->current.end++;
6515
- pm_buffer_append_u8(buffer, '\f');
6464
+ pm_buffer_append_byte(buffer, '\f');
6516
6465
  return;
6517
6466
  }
6518
6467
  case 'n': {
6519
6468
  parser->current.end++;
6520
- pm_buffer_append_u8(buffer, '\n');
6469
+ pm_buffer_append_byte(buffer, '\n');
6521
6470
  return;
6522
6471
  }
6523
6472
  case 'r': {
6524
6473
  parser->current.end++;
6525
- pm_buffer_append_u8(buffer, '\r');
6474
+ pm_buffer_append_byte(buffer, '\r');
6526
6475
  return;
6527
6476
  }
6528
6477
  case 's': {
6529
6478
  parser->current.end++;
6530
- pm_buffer_append_u8(buffer, ' ');
6479
+ pm_buffer_append_byte(buffer, ' ');
6531
6480
  return;
6532
6481
  }
6533
6482
  case 't': {
6534
6483
  parser->current.end++;
6535
- pm_buffer_append_u8(buffer, '\t');
6484
+ pm_buffer_append_byte(buffer, '\t');
6536
6485
  return;
6537
6486
  }
6538
6487
  case 'v': {
6539
6488
  parser->current.end++;
6540
- pm_buffer_append_u8(buffer, '\v');
6489
+ pm_buffer_append_byte(buffer, '\v');
6541
6490
  return;
6542
6491
  }
6543
6492
  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
@@ -6554,7 +6503,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
6554
6503
  }
6555
6504
  }
6556
6505
 
6557
- pm_buffer_append_u8(buffer, value);
6506
+ pm_buffer_append_byte(buffer, value);
6558
6507
  return;
6559
6508
  }
6560
6509
  case 'x': {
@@ -6576,7 +6525,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
6576
6525
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
6577
6526
  pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end - start));
6578
6527
  } else {
6579
- pm_buffer_append_u8(buffer, value);
6528
+ pm_buffer_append_byte(buffer, value);
6580
6529
  }
6581
6530
  } else {
6582
6531
  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
@@ -6770,14 +6719,14 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
6770
6719
  case '\r': {
6771
6720
  if (peek_offset(parser, 1) == '\n') {
6772
6721
  parser->current.end += 2;
6773
- pm_buffer_append_u8(buffer, '\n');
6722
+ pm_buffer_append_byte(buffer, '\n');
6774
6723
  return;
6775
6724
  }
6776
6725
  }
6777
6726
  /* fallthrough */
6778
6727
  default: {
6779
6728
  if (parser->current.end < parser->end) {
6780
- pm_buffer_append_u8(buffer, *parser->current.end++);
6729
+ pm_buffer_append_byte(buffer, *parser->current.end++);
6781
6730
  }
6782
6731
  return;
6783
6732
  }
@@ -7031,7 +6980,7 @@ typedef struct {
7031
6980
  // Push the given byte into the token buffer.
7032
6981
  static inline void
7033
6982
  pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
7034
- pm_buffer_append_u8(&token_buffer->buffer, byte);
6983
+ pm_buffer_append_byte(&token_buffer->buffer, byte);
7035
6984
  }
7036
6985
 
7037
6986
  // When we're about to return from lexing the current token and we know for sure
@@ -8328,7 +8277,7 @@ parser_lex(pm_parser_t *parser) {
8328
8277
 
8329
8278
  // If we haven't found an escape yet, then this buffer will be
8330
8279
  // unallocated since we can refer directly to the source string.
8331
- pm_token_buffer_t token_buffer = { 0 };
8280
+ pm_token_buffer_t token_buffer = { { 0 }, 0 };
8332
8281
 
8333
8282
  while (breakpoint != NULL) {
8334
8283
  // If we hit a null byte, skip directly past it.
@@ -8504,7 +8453,7 @@ parser_lex(pm_parser_t *parser) {
8504
8453
  // characters.
8505
8454
  const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
8506
8455
  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
8507
- pm_token_buffer_t token_buffer = { 0 };
8456
+ pm_token_buffer_t token_buffer = { { 0 }, 0 };
8508
8457
 
8509
8458
  while (breakpoint != NULL) {
8510
8459
  // If we hit a null byte, skip directly past it.
@@ -8693,7 +8642,7 @@ parser_lex(pm_parser_t *parser) {
8693
8642
 
8694
8643
  // If we haven't found an escape yet, then this buffer will be
8695
8644
  // unallocated since we can refer directly to the source string.
8696
- pm_token_buffer_t token_buffer = { 0 };
8645
+ pm_token_buffer_t token_buffer = { { 0 }, 0 };
8697
8646
 
8698
8647
  while (breakpoint != NULL) {
8699
8648
  // If we hit the incrementor, then we'll increment then nesting and
@@ -8954,7 +8903,7 @@ parser_lex(pm_parser_t *parser) {
8954
8903
  }
8955
8904
 
8956
8905
  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
8957
- pm_token_buffer_t token_buffer = { 0 };
8906
+ pm_token_buffer_t token_buffer = { { 0 }, 0 };
8958
8907
  bool was_escaped_newline = false;
8959
8908
 
8960
8909
  while (breakpoint != NULL) {
@@ -9572,10 +9521,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
9572
9521
  splat->expression = parse_target(parser, splat->expression);
9573
9522
  }
9574
9523
 
9575
- pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
9576
- pm_multi_target_node_targets_append(multi_target, (pm_node_t *) splat);
9577
-
9578
- return (pm_node_t *) multi_target;
9524
+ return (pm_node_t *) splat;
9579
9525
  }
9580
9526
  case PM_CALL_NODE: {
9581
9527
  pm_call_node_t *call = (pm_call_node_t *) target;
@@ -9651,7 +9597,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
9651
9597
  }
9652
9598
  }
9653
9599
 
9654
- // Parse a write targets and validate that it is in a valid position for
9600
+ // Parse a write target and validate that it is in a valid position for
9655
9601
  // assignment.
9656
9602
  static pm_node_t *
9657
9603
  parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
@@ -9722,7 +9668,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
9722
9668
  }
9723
9669
 
9724
9670
  pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
9725
- pm_multi_target_node_targets_append(multi_target, (pm_node_t *) splat);
9671
+ pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
9726
9672
 
9727
9673
  return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
9728
9674
  }
@@ -9838,7 +9784,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
9838
9784
  bool has_splat = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
9839
9785
 
9840
9786
  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
9841
- pm_multi_target_node_targets_append(result, parse_target(parser, first_target));
9787
+ pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
9842
9788
 
9843
9789
  while (accept1(parser, PM_TOKEN_COMMA)) {
9844
9790
  if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -9858,19 +9804,19 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
9858
9804
  }
9859
9805
 
9860
9806
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
9861
- pm_multi_target_node_targets_append(result, splat);
9807
+ pm_multi_target_node_targets_append(parser, result, splat);
9862
9808
  has_splat = true;
9863
9809
  } else if (token_begins_expression_p(parser->current.type)) {
9864
9810
  pm_node_t *target = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
9865
9811
  target = parse_target(parser, target);
9866
9812
 
9867
- pm_multi_target_node_targets_append(result, target);
9868
- } else {
9813
+ pm_multi_target_node_targets_append(parser, result, target);
9814
+ } else if (!match1(parser, PM_TOKEN_EOF)) {
9869
9815
  // If we get here, then we have a trailing , in a multi target node.
9870
9816
  // We need to indicate this somehow in the tree, so we'll add an
9871
9817
  // anonymous splat.
9872
9818
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &parser->previous, NULL);
9873
- pm_multi_target_node_targets_append(result, splat);
9819
+ pm_multi_target_node_targets_append(parser, result, splat);
9874
9820
  break;
9875
9821
  }
9876
9822
  }
@@ -9963,9 +9909,11 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
9963
9909
  }
9964
9910
 
9965
9911
  // Parse all of the elements of a hash.
9966
- static void
9912
+ // Returns true if a double splat was found
9913
+ static bool
9967
9914
  parse_assocs(pm_parser_t *parser, pm_node_t *node) {
9968
9915
  assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
9916
+ bool contains_keyword_splat = false;
9969
9917
 
9970
9918
  while (true) {
9971
9919
  pm_node_t *element;
@@ -9983,6 +9931,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
9983
9931
  }
9984
9932
 
9985
9933
  element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
9934
+ contains_keyword_splat = true;
9986
9935
  break;
9987
9936
  }
9988
9937
  case PM_TOKEN_LABEL: {
@@ -10041,7 +9990,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
10041
9990
  }
10042
9991
 
10043
9992
  // If there's no comma after the element, then we're done.
10044
- if (!accept1(parser, PM_TOKEN_COMMA)) return;
9993
+ if (!accept1(parser, PM_TOKEN_COMMA)) break;
10045
9994
 
10046
9995
  // If the next element starts with a label or a **, then we know we have
10047
9996
  // another element in the hash, so we'll continue parsing.
@@ -10052,8 +10001,9 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
10052
10001
  if (token_begins_expression_p(parser->current.type)) continue;
10053
10002
 
10054
10003
  // Otherwise by default we will exit out of this loop.
10055
- return;
10004
+ break;
10056
10005
  }
10006
+ return contains_keyword_splat;
10057
10007
  }
10058
10008
 
10059
10009
  // Append an argument to a list of arguments.
@@ -10101,12 +10051,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10101
10051
  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
10102
10052
  argument = (pm_node_t *) hash;
10103
10053
 
10054
+ bool contains_keyword_splat = false;
10104
10055
  if (!match7(parser, terminator, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
10105
- parse_assocs(parser, (pm_node_t *) hash);
10056
+ contains_keyword_splat = parse_assocs(parser, (pm_node_t *) hash);
10106
10057
  }
10107
10058
 
10108
10059
  parsed_bare_hash = true;
10109
10060
  parse_arguments_append(parser, arguments, argument);
10061
+ if (contains_keyword_splat) {
10062
+ arguments->arguments->base.flags |= PM_ARGUMENTS_NODE_FLAGS_KEYWORD_SPLAT;
10063
+ }
10110
10064
  break;
10111
10065
  }
10112
10066
  case PM_TOKEN_UAMPERSAND: {
@@ -10180,6 +10134,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10180
10134
  argument = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
10181
10135
  }
10182
10136
 
10137
+ bool contains_keyword_splat = false;
10183
10138
  if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
10184
10139
  if (parsed_bare_hash) {
10185
10140
  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
@@ -10206,13 +10161,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10206
10161
  token_begins_expression_p(parser->current.type) ||
10207
10162
  match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
10208
10163
  )) {
10209
- parse_assocs(parser, (pm_node_t *) bare_hash);
10164
+ contains_keyword_splat = parse_assocs(parser, (pm_node_t *) bare_hash);
10210
10165
  }
10211
10166
 
10212
10167
  parsed_bare_hash = true;
10213
10168
  }
10214
10169
 
10215
10170
  parse_arguments_append(parser, arguments, argument);
10171
+ if (contains_keyword_splat) {
10172
+ arguments->arguments->base.flags |= PM_ARGUMENTS_NODE_FLAGS_KEYWORD_SPLAT;
10173
+ }
10216
10174
  break;
10217
10175
  }
10218
10176
  }
@@ -10248,34 +10206,27 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10248
10206
  // end
10249
10207
  //
10250
10208
  // It can recurse infinitely down, and splats are allowed to group arguments.
10251
- static pm_required_destructured_parameter_node_t *
10209
+ static pm_multi_target_node_t *
10252
10210
  parse_required_destructured_parameter(pm_parser_t *parser) {
10253
10211
  expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
10254
10212
 
10255
- pm_token_t opening = parser->previous;
10256
- pm_required_destructured_parameter_node_t *node = pm_required_destructured_parameter_node_create(parser, &opening);
10257
- bool parsed_splat = false;
10213
+ pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
10214
+ pm_multi_target_node_opening_set(node, &parser->previous);
10258
10215
 
10259
10216
  do {
10260
10217
  pm_node_t *param;
10261
10218
 
10262
- if (node->parameters.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
10263
- if (parsed_splat) {
10264
- pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT);
10265
- }
10266
-
10219
+ // If we get here then we have a trailing comma. In this case we'll
10220
+ // create an implicit splat node.
10221
+ if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
10267
10222
  param = (pm_node_t *) pm_splat_node_create(parser, &parser->previous, NULL);
10268
- pm_required_destructured_parameter_node_append_parameter(node, param);
10223
+ pm_multi_target_node_targets_append(parser, node, param);
10269
10224
  break;
10270
10225
  }
10271
10226
 
10272
10227
  if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
10273
10228
  param = (pm_node_t *) parse_required_destructured_parameter(parser);
10274
10229
  } else if (accept1(parser, PM_TOKEN_USTAR)) {
10275
- if (parsed_splat) {
10276
- pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT);
10277
- }
10278
-
10279
10230
  pm_token_t star = parser->previous;
10280
10231
  pm_node_t *value = NULL;
10281
10232
 
@@ -10287,7 +10238,6 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
10287
10238
  }
10288
10239
 
10289
10240
  param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
10290
- parsed_splat = true;
10291
10241
  } else {
10292
10242
  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
10293
10243
  pm_token_t name = parser->previous;
@@ -10297,11 +10247,11 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
10297
10247
  pm_parser_local_add_token(parser, &name);
10298
10248
  }
10299
10249
 
10300
- pm_required_destructured_parameter_node_append_parameter(node, param);
10250
+ pm_multi_target_node_targets_append(parser, node, param);
10301
10251
  } while (accept1(parser, PM_TOKEN_COMMA));
10302
10252
 
10303
10253
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
10304
- pm_required_destructured_parameter_node_closing_set(node, &parser->previous);
10254
+ pm_multi_target_node_closing_set(node, &parser->previous);
10305
10255
 
10306
10256
  return node;
10307
10257
  }
@@ -11787,28 +11737,40 @@ parse_pattern_keyword_rest(pm_parser_t *parser) {
11787
11737
  // Parse a hash pattern.
11788
11738
  static pm_hash_pattern_node_t *
11789
11739
  parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
11790
- if (PM_NODE_TYPE_P(first_assoc, PM_ASSOC_NODE)) {
11791
- if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
11792
- // Here we have a value for the first assoc in the list, so we will parse it
11793
- // now and update the first assoc.
11794
- pm_node_t *value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
11795
-
11796
- pm_assoc_node_t *assoc = (pm_assoc_node_t *) first_assoc;
11797
- assoc->base.location.end = value->location.end;
11798
- assoc->value = value;
11799
- } else {
11800
- pm_node_t *key = ((pm_assoc_node_t *) first_assoc)->key;
11740
+ pm_node_list_t assocs = PM_EMPTY_NODE_LIST;
11741
+ pm_node_t *rest = NULL;
11801
11742
 
11802
- if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
11803
- const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
11804
- pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
11743
+ switch (PM_NODE_TYPE(first_assoc)) {
11744
+ case PM_ASSOC_NODE: {
11745
+ if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
11746
+ // Here we have a value for the first assoc in the list, so we will
11747
+ // parse it now and update the first assoc.
11748
+ pm_node_t *value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
11749
+
11750
+ pm_assoc_node_t *assoc = (pm_assoc_node_t *) first_assoc;
11751
+ assoc->base.location.end = value->location.end;
11752
+ assoc->value = value;
11753
+ } else {
11754
+ pm_node_t *key = ((pm_assoc_node_t *) first_assoc)->key;
11755
+
11756
+ if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
11757
+ const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
11758
+ pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
11759
+ }
11805
11760
  }
11761
+
11762
+ pm_node_list_append(&assocs, first_assoc);
11763
+ break;
11806
11764
  }
11765
+ case PM_ASSOC_SPLAT_NODE:
11766
+ case PM_NO_KEYWORDS_PARAMETER_NODE:
11767
+ rest = first_assoc;
11768
+ break;
11769
+ default:
11770
+ assert(false);
11771
+ break;
11807
11772
  }
11808
11773
 
11809
- pm_node_list_t assocs = PM_EMPTY_NODE_LIST;
11810
- pm_node_list_append(&assocs, first_assoc);
11811
-
11812
11774
  // If there are any other assocs, then we'll parse them now.
11813
11775
  while (accept1(parser, PM_TOKEN_COMMA)) {
11814
11776
  // Here we need to break to support trailing commas.
@@ -11839,7 +11801,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
11839
11801
  pm_node_list_append(&assocs, assoc);
11840
11802
  }
11841
11803
 
11842
- pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs);
11804
+ pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
11843
11805
  free(assocs.nodes);
11844
11806
 
11845
11807
  return node;
@@ -11924,32 +11886,45 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
11924
11886
  // pattern node.
11925
11887
  node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
11926
11888
  } else {
11927
- pm_node_t *key;
11889
+ pm_node_t *first_assoc;
11928
11890
 
11929
11891
  switch (parser->current.type) {
11930
- case PM_TOKEN_LABEL:
11892
+ case PM_TOKEN_LABEL: {
11931
11893
  parser_lex(parser);
11932
- key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
11894
+
11895
+ pm_symbol_node_t *key = pm_symbol_node_label_create(parser, &parser->previous);
11896
+ pm_token_t operator = not_provided(parser);
11897
+
11898
+ first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
11933
11899
  break;
11900
+ }
11934
11901
  case PM_TOKEN_USTAR_STAR:
11935
- key = parse_pattern_keyword_rest(parser);
11902
+ first_assoc = parse_pattern_keyword_rest(parser);
11936
11903
  break;
11937
- case PM_TOKEN_STRING_BEGIN:
11938
- key = parse_expression(parser, PM_BINDING_POWER_MAX, PM_ERR_PATTERN_HASH_KEY);
11904
+ case PM_TOKEN_STRING_BEGIN: {
11905
+ pm_node_t *key = parse_expression(parser, PM_BINDING_POWER_MAX, PM_ERR_PATTERN_HASH_KEY);
11906
+ pm_token_t operator = not_provided(parser);
11907
+
11939
11908
  if (!pm_symbol_node_label_p(key)) {
11940
11909
  pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
11941
11910
  }
11942
11911
 
11912
+ first_assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL);
11943
11913
  break;
11944
- default:
11914
+ }
11915
+ default: {
11945
11916
  parser_lex(parser);
11946
11917
  pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
11947
- key = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
11918
+
11919
+ pm_missing_node_t *key = pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
11920
+ pm_token_t operator = not_provided(parser);
11921
+
11922
+ first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
11948
11923
  break;
11924
+ }
11949
11925
  }
11950
11926
 
11951
- pm_token_t operator = not_provided(parser);
11952
- node = parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL));
11927
+ node = parse_pattern_hash(parser, first_assoc);
11953
11928
 
11954
11929
  accept1(parser, PM_TOKEN_NEWLINE);
11955
11930
  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
@@ -12577,16 +12552,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
12577
12552
  parser_lex(parser);
12578
12553
  pm_accepts_block_stack_pop(parser);
12579
12554
 
12580
- // If we have a single statement and are ending on a right
12581
- // parenthesis, then we need to check if this is possibly a
12582
- // multiple target node.
12583
- if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
12555
+ if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
12556
+ // If we have a single statement and are ending on a right
12557
+ // parenthesis, then we need to check if this is possibly a
12558
+ // multiple target node.
12584
12559
  pm_multi_target_node_t *multi_target;
12585
- if (((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
12560
+
12561
+ if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
12586
12562
  multi_target = (pm_multi_target_node_t *) statement;
12587
12563
  } else {
12588
12564
  multi_target = pm_multi_target_node_create(parser);
12589
- pm_multi_target_node_targets_append(multi_target, statement);
12565
+ pm_multi_target_node_targets_append(parser, multi_target, statement);
12590
12566
  }
12591
12567
 
12592
12568
  pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
@@ -12598,10 +12574,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
12598
12574
  multi_target->base.location.end = rparen_loc.end;
12599
12575
 
12600
12576
  if (match1(parser, PM_TOKEN_COMMA)) {
12601
- return parse_targets_validate(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX);
12602
- } else {
12603
- return parse_target_validate(parser, (pm_node_t *) multi_target);
12577
+ if (binding_power == PM_BINDING_POWER_STATEMENT) {
12578
+ return parse_targets_validate(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX);
12579
+ }
12580
+ return (pm_node_t *) multi_target;
12604
12581
  }
12582
+
12583
+ return parse_target_validate(parser, (pm_node_t *) multi_target);
12605
12584
  }
12606
12585
 
12607
12586
  // If we have a single statement and are ending on a right parenthesis
@@ -13688,7 +13667,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13688
13667
  parser_lex(parser);
13689
13668
  pm_token_t for_keyword = parser->previous;
13690
13669
  pm_node_t *index;
13670
+
13691
13671
  pm_parser_scope_push_transparent(parser);
13672
+ context_push(parser, PM_CONTEXT_FOR_INDEX);
13692
13673
 
13693
13674
  // First, parse out the first index expression.
13694
13675
  if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -13714,6 +13695,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13714
13695
  index = parse_target(parser, index);
13715
13696
  }
13716
13697
 
13698
+ context_pop(parser);
13717
13699
  pm_parser_scope_pop(parser);
13718
13700
  pm_do_loop_stack_push(parser, true);
13719
13701
 
@@ -14596,6 +14578,50 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
14596
14578
  }
14597
14579
  }
14598
14580
 
14581
+ // Potentially change a =~ with a regular expression with named captures into a
14582
+ // match write node.
14583
+ static pm_node_t *
14584
+ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
14585
+ pm_string_list_t named_captures;
14586
+ pm_string_list_init(&named_captures);
14587
+
14588
+ pm_node_t *result;
14589
+ if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
14590
+ pm_match_write_node_t *match = pm_match_write_node_create(parser, call);
14591
+
14592
+ for (size_t index = 0; index < named_captures.length; index++) {
14593
+ pm_string_t *name = &named_captures.strings[index];
14594
+ pm_constant_id_t local;
14595
+
14596
+ if (content->type == PM_STRING_SHARED) {
14597
+ // If the unescaped string is a slice of the source,
14598
+ // then we can copy the names directly. The pointers
14599
+ // will line up.
14600
+ local = pm_parser_local_add_location(parser, name->source, name->source + name->length);
14601
+ } else {
14602
+ // Otherwise, the name is a slice of the malloc-ed
14603
+ // owned string, in which case we need to copy it
14604
+ // out into a new string.
14605
+ size_t length = pm_string_length(name);
14606
+
14607
+ void *memory = malloc(length);
14608
+ memcpy(memory, pm_string_source(name), length);
14609
+
14610
+ local = pm_parser_local_add_owned(parser, (const uint8_t *) memory, length);
14611
+ }
14612
+
14613
+ pm_constant_id_list_append(&match->locals, local);
14614
+ }
14615
+
14616
+ result = (pm_node_t *) match;
14617
+ } else {
14618
+ result = (pm_node_t *) call;
14619
+ }
14620
+
14621
+ pm_string_list_free(&named_captures);
14622
+ return result;
14623
+ }
14624
+
14599
14625
  static inline pm_node_t *
14600
14626
  parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power) {
14601
14627
  pm_token_t token = parser->current;
@@ -14620,18 +14646,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
14620
14646
  return parse_write(parser, node, &token, value);
14621
14647
  }
14622
14648
  case PM_SPLAT_NODE: {
14623
- pm_splat_node_t *splat_node = (pm_splat_node_t *) node;
14649
+ pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
14650
+ pm_multi_target_node_targets_append(parser, multi_target, node);
14624
14651
 
14625
- switch (PM_NODE_TYPE(splat_node->expression)) {
14626
- case PM_CASE_WRITABLE:
14627
- parser_lex(parser);
14628
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
14629
- return parse_write(parser, (pm_node_t *) splat_node, &token, value);
14630
- default:
14631
- break;
14632
- }
14652
+ parser_lex(parser);
14653
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
14654
+ return parse_write(parser, (pm_node_t *) multi_target, &token, value);
14633
14655
  }
14634
- /* fallthrough */
14635
14656
  default:
14636
14657
  parser_lex(parser);
14637
14658
 
@@ -15026,42 +15047,51 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15026
15047
  // If the receiver of this =~ is a regular expression node, then we
15027
15048
  // need to introduce local variables for it based on its named
15028
15049
  // capture groups.
15029
- if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
15030
- pm_string_list_t named_captures;
15031
- pm_string_list_init(&named_captures);
15032
-
15033
- const pm_string_t *unescaped = &((pm_regular_expression_node_t *) node)->unescaped;
15034
- if (pm_regexp_named_capture_group_names(pm_string_source(unescaped), pm_string_length(unescaped), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
15035
- pm_match_write_node_t *match = pm_match_write_node_create(parser, call);
15036
-
15037
- for (size_t index = 0; index < named_captures.length; index++) {
15038
- pm_string_t *name = &named_captures.strings[index];
15039
- pm_constant_id_t local;
15040
-
15041
- if (unescaped->type == PM_STRING_SHARED) {
15042
- // If the unescaped string is a slice of the source,
15043
- // then we can copy the names directly. The pointers
15044
- // will line up.
15045
- local = pm_parser_local_add_location(parser, name->source, name->source + name->length);
15046
- } else {
15047
- // Otherwise, the name is a slice of the malloc-ed
15048
- // owned string, in which case we need to copy it
15049
- // out into a new string.
15050
- size_t length = pm_string_length(name);
15050
+ if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
15051
+ // It's possible to have an interpolated regular expression node
15052
+ // that only contains strings. This is because it can be split
15053
+ // up by a heredoc. In this case we need to concat the unescaped
15054
+ // strings together and then parse them as a regular expression.
15055
+ pm_node_list_t *parts = &((pm_interpolated_regular_expression_node_t *) node)->parts;
15051
15056
 
15052
- void *memory = malloc(length);
15053
- memcpy(memory, pm_string_source(name), length);
15057
+ bool interpolated = false;
15058
+ size_t total_length = 0;
15054
15059
 
15055
- local = pm_parser_local_add_owned(parser, (const uint8_t *) memory, length);
15056
- }
15060
+ for (size_t index = 0; index < parts->size; index++) {
15061
+ pm_node_t *part = parts->nodes[index];
15057
15062
 
15058
- pm_constant_id_list_append(&match->locals, local);
15063
+ if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
15064
+ total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
15065
+ } else {
15066
+ interpolated = true;
15067
+ break;
15059
15068
  }
15060
-
15061
- result = (pm_node_t *) match;
15062
15069
  }
15063
15070
 
15064
- pm_string_list_free(&named_captures);
15071
+ if (!interpolated) {
15072
+ void *memory = malloc(total_length);
15073
+ if (!memory) abort();
15074
+
15075
+ uint8_t *cursor = memory;
15076
+ for (size_t index = 0; index < parts->size; index++) {
15077
+ pm_string_t *unescaped = &((pm_string_node_t *) parts->nodes[index])->unescaped;
15078
+ size_t length = pm_string_length(unescaped);
15079
+
15080
+ memcpy(cursor, pm_string_source(unescaped), length);
15081
+ cursor += length;
15082
+ }
15083
+
15084
+ pm_string_t owned;
15085
+ pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
15086
+
15087
+ result = parse_regular_expression_named_captures(parser, &owned, call);
15088
+ pm_string_free(&owned);
15089
+ }
15090
+ } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
15091
+ // If we have a regular expression node, then we can just parse
15092
+ // the named captures directly off the unescaped string.
15093
+ const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
15094
+ result = parse_regular_expression_named_captures(parser, content, call);
15065
15095
  }
15066
15096
 
15067
15097
  return result;
@@ -15667,16 +15697,20 @@ pm_parse(pm_parser_t *parser) {
15667
15697
  return parse_program(parser);
15668
15698
  }
15669
15699
 
15700
+ static inline void
15701
+ pm_serialize_header(pm_buffer_t *buffer) {
15702
+ pm_buffer_append_string(buffer, "PRISM", 5);
15703
+ pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
15704
+ pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
15705
+ pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
15706
+ pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
15707
+ }
15708
+
15670
15709
  PRISM_EXPORTED_FUNCTION void
15671
15710
  pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
15672
- pm_buffer_append_str(buffer, "PRISM", 5);
15673
- pm_buffer_append_u8(buffer, PRISM_VERSION_MAJOR);
15674
- pm_buffer_append_u8(buffer, PRISM_VERSION_MINOR);
15675
- pm_buffer_append_u8(buffer, PRISM_VERSION_PATCH);
15676
- pm_buffer_append_u8(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
15677
-
15711
+ pm_serialize_header(buffer);
15678
15712
  pm_serialize_content(parser, node, buffer);
15679
- pm_buffer_append_str(buffer, "\0", 1);
15713
+ pm_buffer_append_string(buffer, "\0", 1);
15680
15714
  }
15681
15715
 
15682
15716
  // Parse and serialize the AST represented by the given source to the given
@@ -15688,7 +15722,26 @@ pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, cons
15688
15722
  if (metadata) pm_parser_metadata(&parser, metadata);
15689
15723
 
15690
15724
  pm_node_t *node = pm_parse(&parser);
15691
- pm_serialize(&parser, node, buffer);
15725
+
15726
+ pm_serialize_header(buffer);
15727
+ pm_serialize_content(&parser, node, buffer);
15728
+ pm_buffer_append_byte(buffer, '\0');
15729
+
15730
+ pm_node_destroy(&parser, node);
15731
+ pm_parser_free(&parser);
15732
+ }
15733
+
15734
+ // Parse and serialize the comments in the given source to the given buffer.
15735
+ PRISM_EXPORTED_FUNCTION void
15736
+ pm_parse_serialize_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata) {
15737
+ pm_parser_t parser;
15738
+ pm_parser_init(&parser, source, size, NULL);
15739
+ if (metadata) pm_parser_metadata(&parser, metadata);
15740
+
15741
+ pm_node_t *node = pm_parse(&parser);
15742
+ pm_serialize_header(buffer);
15743
+ pm_serialize_encoding(&parser.encoding, buffer);
15744
+ pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
15692
15745
 
15693
15746
  pm_node_destroy(&parser, node);
15694
15747
  pm_parser_free(&parser);