prism 0.15.1 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +20 -1
  3. data/Makefile +6 -0
  4. data/README.md +2 -0
  5. data/config.yml +21 -20
  6. data/docs/configuration.md +2 -0
  7. data/docs/javascript.md +90 -0
  8. data/docs/releasing.md +27 -0
  9. data/docs/ruby_api.md +2 -0
  10. data/ext/prism/api_node.c +66 -68
  11. data/ext/prism/extension.c +73 -0
  12. data/ext/prism/extension.h +1 -1
  13. data/include/prism/ast.h +40 -40
  14. data/include/prism/defines.h +9 -0
  15. data/include/prism/enc/pm_encoding.h +1 -0
  16. data/include/prism/node.h +0 -17
  17. data/include/prism/parser.h +1 -0
  18. data/include/prism/prettyprint.h +15 -0
  19. data/include/prism/util/pm_buffer.h +10 -4
  20. data/include/prism/util/pm_constant_pool.h +1 -1
  21. data/include/prism/util/pm_newline_list.h +1 -1
  22. data/include/prism/version.h +3 -3
  23. data/include/prism.h +11 -11
  24. data/lib/prism/compiler.rb +0 -3
  25. data/lib/prism/debug.rb +20 -6
  26. data/lib/prism/desugar_compiler.rb +1 -1
  27. data/lib/prism/dispatcher.rb +0 -14
  28. data/lib/prism/dsl.rb +8 -13
  29. data/lib/prism/ffi.rb +25 -0
  30. data/lib/prism/lex_compat.rb +1 -1
  31. data/lib/prism/mutation_compiler.rb +3 -8
  32. data/lib/prism/node.rb +123 -159
  33. data/lib/prism/node_ext.rb +23 -16
  34. data/lib/prism/parse_result.rb +21 -5
  35. data/lib/prism/pattern.rb +3 -3
  36. data/lib/prism/serialize.rb +901 -305
  37. data/lib/prism/visitor.rb +0 -3
  38. data/prism.gemspec +8 -1
  39. data/rbi/prism.rbi +7261 -0
  40. data/rbi/prism_static.rbi +182 -0
  41. data/sig/prism.rbs +4439 -0
  42. data/sig/prism_static.rbs +110 -0
  43. data/src/enc/pm_unicode.c +1 -1
  44. data/src/node.c +28 -29
  45. data/src/prettyprint.c +7674 -1647
  46. data/src/prism.c +353 -300
  47. data/src/regexp.c +2 -0
  48. data/src/serialize.c +392 -381
  49. data/src/util/pm_buffer.c +47 -12
  50. data/src/util/pm_constant_pool.c +1 -1
  51. data/src/util/pm_newline_list.c +8 -54
  52. metadata +9 -2
data/src/prism.c CHANGED
@@ -40,6 +40,7 @@ debug_context(pm_context_t context) {
40
40
  case PM_CONTEXT_BLOCK_BRACES: return "BLOCK_BRACES";
41
41
  case PM_CONTEXT_BLOCK_KEYWORDS: return "BLOCK_KEYWORDS";
42
42
  case PM_CONTEXT_FOR: return "FOR";
43
+ case PM_CONTEXT_FOR_INDEX: return "FOR_INDEX";
43
44
  case PM_CONTEXT_IF: return "IF";
44
45
  case PM_CONTEXT_MAIN: return "MAIN";
45
46
  case PM_CONTEXT_MODULE: return "MODULE";
@@ -80,14 +81,12 @@ debug_contexts(pm_parser_t *parser) {
80
81
  }
81
82
 
82
83
  PRISM_ATTRIBUTE_UNUSED static void
83
- debug_node(const char *message, pm_parser_t *parser, pm_node_t *node) {
84
- pm_buffer_t buffer;
85
- if (!pm_buffer_init(&buffer)) return;
86
-
87
- pm_prettyprint(parser, node, &buffer);
84
+ debug_node(const pm_parser_t *parser, const pm_node_t *node) {
85
+ pm_buffer_t output_buffer = { 0 };
86
+ pm_prettyprint(&output_buffer, parser, node);
88
87
 
89
- fprintf(stderr, "%s\n%.*s\n", message, (int) buffer.length, buffer.value);
90
- pm_buffer_free(&buffer);
88
+ fprintf(stderr, "%.*s", (int) output_buffer.length, output_buffer.value);
89
+ pm_buffer_free(&output_buffer);
91
90
  }
92
91
 
93
92
  PRISM_ATTRIBUTE_UNUSED static void
@@ -648,87 +647,6 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
648
647
  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
649
648
  }
650
649
 
651
- /******************************************************************************/
652
- /* Scope node functions */
653
- /******************************************************************************/
654
-
655
- // Generate a scope node from the given node.
656
- void
657
- pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_t *previous, pm_parser_t *parser) {
658
- scope->base.type = PM_SCOPE_NODE;
659
- scope->base.location.start = node->location.start;
660
- scope->base.location.end = node->location.end;
661
-
662
- scope->previous = previous;
663
- scope->parser = parser;
664
- scope->ast_node = (pm_node_t *)node;
665
- scope->parameters = NULL;
666
- scope->body = NULL;
667
- scope->constants = NULL;
668
- if (previous) {
669
- scope->constants = previous->constants;
670
- }
671
- scope->index_lookup_table = NULL;
672
-
673
- pm_constant_id_list_init(&scope->locals);
674
-
675
- switch (PM_NODE_TYPE(node)) {
676
- case PM_BLOCK_NODE: {
677
- pm_block_node_t *cast = (pm_block_node_t *) node;
678
- if (cast->parameters) scope->parameters = cast->parameters->parameters;
679
- scope->body = cast->body;
680
- scope->locals = cast->locals;
681
- break;
682
- }
683
- case PM_CLASS_NODE: {
684
- pm_class_node_t *cast = (pm_class_node_t *) node;
685
- scope->body = cast->body;
686
- scope->locals = cast->locals;
687
- break;
688
- }
689
- case PM_DEF_NODE: {
690
- pm_def_node_t *cast = (pm_def_node_t *) node;
691
- scope->parameters = cast->parameters;
692
- scope->body = cast->body;
693
- scope->locals = cast->locals;
694
- break;
695
- }
696
- case PM_FOR_NODE: {
697
- pm_for_node_t *cast = (pm_for_node_t *)node;
698
- scope->body = (pm_node_t *)cast->statements;
699
- break;
700
- }
701
- case PM_LAMBDA_NODE: {
702
- pm_lambda_node_t *cast = (pm_lambda_node_t *) node;
703
- if (cast->parameters) scope->parameters = cast->parameters->parameters;
704
- scope->body = cast->body;
705
- scope->locals = cast->locals;
706
- break;
707
- }
708
- case PM_MODULE_NODE: {
709
- pm_module_node_t *cast = (pm_module_node_t *) node;
710
- scope->body = cast->body;
711
- scope->locals = cast->locals;
712
- break;
713
- }
714
- case PM_PROGRAM_NODE: {
715
- pm_program_node_t *cast = (pm_program_node_t *) node;
716
- scope->body = (pm_node_t *) cast->statements;
717
- scope->locals = cast->locals;
718
- break;
719
- }
720
- case PM_SINGLETON_CLASS_NODE: {
721
- pm_singleton_class_node_t *cast = (pm_singleton_class_node_t *) node;
722
- scope->body = cast->body;
723
- scope->locals = cast->locals;
724
- break;
725
- }
726
- default:
727
- assert(false && "unreachable");
728
- break;
729
- }
730
- }
731
-
732
650
  /******************************************************************************/
733
651
  /* Node creation functions */
734
652
  /******************************************************************************/
@@ -765,11 +683,15 @@ parse_decimal_number(pm_parser_t *parser, const uint8_t *start, const uint8_t *e
765
683
  return (uint32_t) value;
766
684
  }
767
685
 
686
+ // When you have an encoding flag on a regular expression, it takes precedence
687
+ // over all of the previously set encoding flags. So we need to mask off any
688
+ // previously set encoding flags before setting the new one.
689
+ #define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
690
+
768
691
  // Parse out the options for a regular expression.
769
692
  static inline pm_node_flags_t
770
693
  pm_regular_expression_flags_create(const pm_token_t *closing) {
771
694
  pm_node_flags_t flags = 0;
772
- pm_node_flags_t mask = (uint16_t) 0xFF0F;
773
695
 
774
696
  if (closing->type == PM_TOKEN_REGEXP_END) {
775
697
  for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
@@ -779,10 +701,10 @@ pm_regular_expression_flags_create(const pm_token_t *closing) {
779
701
  case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
780
702
  case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
781
703
 
782
- case 'e': flags &= mask; flags |= PM_REGULAR_EXPRESSION_FLAGS_EUC_JP; break;
783
- case 'n': flags &= mask; flags |= PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT; break;
784
- case 's': flags &= mask; flags |= PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J; break;
785
- case 'u': flags &= mask; flags |= PM_REGULAR_EXPRESSION_FLAGS_UTF_8; break;
704
+ case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
705
+ case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
706
+ case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
707
+ case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
786
708
 
787
709
  default: assert(false && "unreachable");
788
710
  }
@@ -792,6 +714,8 @@ pm_regular_expression_flags_create(const pm_token_t *closing) {
792
714
  return flags;
793
715
  }
794
716
 
717
+ #undef PM_REGULAR_EXPRESSION_ENCODING_MASK
718
+
795
719
  // Allocate and initialize a new StatementsNode node.
796
720
  static pm_statements_node_t *
797
721
  pm_statements_node_create(pm_parser_t *parser);
@@ -2664,10 +2588,10 @@ pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening
2664
2588
  },
2665
2589
  },
2666
2590
  .constant = NULL,
2667
- .kwrest = NULL,
2668
2591
  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2669
2592
  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2670
- .assocs = PM_EMPTY_NODE_LIST
2593
+ .elements = PM_EMPTY_NODE_LIST,
2594
+ .rest = NULL
2671
2595
  };
2672
2596
 
2673
2597
  return node;
@@ -2675,27 +2599,44 @@ pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening
2675
2599
 
2676
2600
  // Allocate and initialize a new hash pattern node.
2677
2601
  static pm_hash_pattern_node_t *
2678
- pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *assocs) {
2602
+ pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
2679
2603
  pm_hash_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_hash_pattern_node_t);
2680
2604
 
2605
+ const uint8_t *start;
2606
+ const uint8_t *end;
2607
+
2608
+ if (elements->size > 0) {
2609
+ if (rest) {
2610
+ start = elements->nodes[0]->location.start;
2611
+ end = rest->location.end;
2612
+ } else {
2613
+ start = elements->nodes[0]->location.start;
2614
+ end = elements->nodes[elements->size - 1]->location.end;
2615
+ }
2616
+ } else {
2617
+ assert(rest != NULL);
2618
+ start = rest->location.start;
2619
+ end = rest->location.end;
2620
+ }
2621
+
2681
2622
  *node = (pm_hash_pattern_node_t) {
2682
2623
  {
2683
2624
  .type = PM_HASH_PATTERN_NODE,
2684
2625
  .location = {
2685
- .start = assocs->nodes[0]->location.start,
2686
- .end = assocs->nodes[assocs->size - 1]->location.end
2626
+ .start = start,
2627
+ .end = end
2687
2628
  },
2688
2629
  },
2689
2630
  .constant = NULL,
2690
- .kwrest = NULL,
2691
- .assocs = PM_EMPTY_NODE_LIST,
2631
+ .elements = PM_EMPTY_NODE_LIST,
2632
+ .rest = rest,
2692
2633
  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2693
2634
  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2694
2635
  };
2695
2636
 
2696
- for (size_t index = 0; index < assocs->size; index++) {
2697
- pm_node_t *assoc = assocs->nodes[index];
2698
- pm_node_list_append(&node->assocs, assoc);
2637
+ for (size_t index = 0; index < elements->size; index++) {
2638
+ pm_node_t *element = elements->nodes[index];
2639
+ pm_node_list_append(&node->elements, element);
2699
2640
  }
2700
2641
 
2701
2642
  return node;
@@ -3692,7 +3633,9 @@ pm_multi_target_node_create(pm_parser_t *parser) {
3692
3633
  .type = PM_MULTI_TARGET_NODE,
3693
3634
  .location = { .start = NULL, .end = NULL }
3694
3635
  },
3695
- .targets = PM_EMPTY_NODE_LIST,
3636
+ .lefts = PM_EMPTY_NODE_LIST,
3637
+ .rest = NULL,
3638
+ .rights = PM_EMPTY_NODE_LIST,
3696
3639
  .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3697
3640
  .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
3698
3641
  };
@@ -3702,8 +3645,19 @@ pm_multi_target_node_create(pm_parser_t *parser) {
3702
3645
 
3703
3646
  // Append a target to a MultiTargetNode node.
3704
3647
  static void
3705
- pm_multi_target_node_targets_append(pm_multi_target_node_t *node, pm_node_t *target) {
3706
- pm_node_list_append(&node->targets, target);
3648
+ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
3649
+ if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
3650
+ if (node->rest == NULL) {
3651
+ node->rest = target;
3652
+ } else {
3653
+ pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
3654
+ pm_node_list_append(&node->rights, target);
3655
+ }
3656
+ } else if (node->rest == NULL) {
3657
+ pm_node_list_append(&node->lefts, target);
3658
+ } else {
3659
+ pm_node_list_append(&node->rights, target);
3660
+ }
3707
3661
 
3708
3662
  if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
3709
3663
  node->base.location.start = target->location.start;
@@ -3714,6 +3668,20 @@ pm_multi_target_node_targets_append(pm_multi_target_node_t *node, pm_node_t *tar
3714
3668
  }
3715
3669
  }
3716
3670
 
3671
+ // Set the opening of a MultiTargetNode node.
3672
+ static void
3673
+ pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
3674
+ node->base.location.start = lparen->start;
3675
+ node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
3676
+ }
3677
+
3678
+ // Set the closing of a MultiTargetNode node.
3679
+ static void
3680
+ pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
3681
+ node->base.location.end = rparen->end;
3682
+ node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
3683
+ }
3684
+
3717
3685
  // Allocate a new MultiWriteNode node.
3718
3686
  static pm_multi_write_node_t *
3719
3687
  pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
@@ -3727,7 +3695,9 @@ pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target,
3727
3695
  .end = value->location.end
3728
3696
  }
3729
3697
  },
3730
- .targets = target->targets,
3698
+ .lefts = target->lefts,
3699
+ .rest = target->rest,
3700
+ .rights = target->rights,
3731
3701
  .lparen_loc = target->lparen_loc,
3732
3702
  .rparen_loc = target->rparen_loc,
3733
3703
  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
@@ -4154,37 +4124,6 @@ pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening
4154
4124
  return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_EMPTY_STRING);
4155
4125
  }
4156
4126
 
4157
- // Allocate a new RequiredDestructuredParameterNode node.
4158
- static pm_required_destructured_parameter_node_t *
4159
- pm_required_destructured_parameter_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4160
- pm_required_destructured_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_required_destructured_parameter_node_t);
4161
-
4162
- *node = (pm_required_destructured_parameter_node_t) {
4163
- {
4164
- .type = PM_REQUIRED_DESTRUCTURED_PARAMETER_NODE,
4165
- .location = PM_LOCATION_TOKEN_VALUE(opening)
4166
- },
4167
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4168
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4169
- .parameters = PM_EMPTY_NODE_LIST
4170
- };
4171
-
4172
- return node;
4173
- }
4174
-
4175
- // Append a new parameter to the given RequiredDestructuredParameterNode node.
4176
- static void
4177
- pm_required_destructured_parameter_node_append_parameter(pm_required_destructured_parameter_node_t *node, pm_node_t *parameter) {
4178
- pm_node_list_append(&node->parameters, parameter);
4179
- }
4180
-
4181
- // Set the closing token of the given RequiredDestructuredParameterNode node.
4182
- static void
4183
- pm_required_destructured_parameter_node_closing_set(pm_required_destructured_parameter_node_t *node, const pm_token_t *closing) {
4184
- node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
4185
- node->base.location.end = closing->end;
4186
- }
4187
-
4188
4127
  // Allocate a new RequiredParameterNode node.
4189
4128
  static pm_required_parameter_node_t *
4190
4129
  pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
@@ -5668,6 +5607,8 @@ context_terminator(pm_context_t context, pm_token_t *token) {
5668
5607
  case PM_CONTEXT_FOR:
5669
5608
  case PM_CONTEXT_ENSURE:
5670
5609
  return token->type == PM_TOKEN_KEYWORD_END;
5610
+ case PM_CONTEXT_FOR_INDEX:
5611
+ return token->type == PM_TOKEN_KEYWORD_IN;
5671
5612
  case PM_CONTEXT_CASE_WHEN:
5672
5613
  return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
5673
5614
  case PM_CONTEXT_CASE_IN:
@@ -6103,16 +6044,21 @@ static pm_token_type_t
6103
6044
  lex_identifier(pm_parser_t *parser, bool previous_command_start) {
6104
6045
  // Lex as far as we can into the current identifier.
6105
6046
  size_t width;
6106
- while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
6107
- parser->current.end += width;
6047
+ const uint8_t *end = parser->end;
6048
+ const uint8_t *current_start = parser->current.start;
6049
+ const uint8_t *current_end = parser->current.end;
6050
+
6051
+ while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
6052
+ current_end += width;
6108
6053
  }
6054
+ parser->current.end = current_end;
6109
6055
 
6110
6056
  // Now cache the length of the identifier so that we can quickly compare it
6111
6057
  // against known keywords.
6112
- width = (size_t) (parser->current.end - parser->current.start);
6058
+ width = (size_t) (current_end - current_start);
6113
6059
 
6114
- if (parser->current.end < parser->end) {
6115
- if (((parser->current.end + 1 >= parser->end) || (parser->current.end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
6060
+ if (current_end < end) {
6061
+ if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
6116
6062
  // First we'll attempt to extend the identifier by a ! or ?. Then we'll
6117
6063
  // check if we're returning the defined? keyword or just an identifier.
6118
6064
  width++;
@@ -6222,7 +6168,10 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
6222
6168
  }
6223
6169
  }
6224
6170
 
6225
- return parser->encoding.isupper_char(parser->current.start, parser->end - parser->current.start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
6171
+ if (parser->encoding_changed) {
6172
+ return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
6173
+ }
6174
+ return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
6226
6175
  }
6227
6176
 
6228
6177
  // Returns true if the current token that the parser is considering is at the
@@ -6423,24 +6372,24 @@ escape_byte(uint8_t value, const uint8_t flags) {
6423
6372
  static inline void
6424
6373
  escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t *start, const uint8_t *end, uint32_t value) {
6425
6374
  if (value <= 0x7F) { // 0xxxxxxx
6426
- pm_buffer_append_u8(buffer, (uint8_t) value);
6375
+ pm_buffer_append_byte(buffer, (uint8_t) value);
6427
6376
  } else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
6428
- pm_buffer_append_u8(buffer, (uint8_t) (0xC0 | (value >> 6)));
6429
- pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
6377
+ pm_buffer_append_byte(buffer, (uint8_t) (0xC0 | (value >> 6)));
6378
+ pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
6430
6379
  } else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
6431
- pm_buffer_append_u8(buffer, (uint8_t) (0xE0 | (value >> 12)));
6432
- pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
6433
- pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
6380
+ pm_buffer_append_byte(buffer, (uint8_t) (0xE0 | (value >> 12)));
6381
+ pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
6382
+ pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
6434
6383
  } else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
6435
- pm_buffer_append_u8(buffer, (uint8_t) (0xF0 | (value >> 18)));
6436
- pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
6437
- pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
6438
- pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
6384
+ pm_buffer_append_byte(buffer, (uint8_t) (0xF0 | (value >> 18)));
6385
+ pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
6386
+ pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
6387
+ pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
6439
6388
  } else {
6440
6389
  pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
6441
- pm_buffer_append_u8(buffer, 0xEF);
6442
- pm_buffer_append_u8(buffer, 0xBF);
6443
- pm_buffer_append_u8(buffer, 0xBD);
6390
+ pm_buffer_append_byte(buffer, 0xEF);
6391
+ pm_buffer_append_byte(buffer, 0xBF);
6392
+ pm_buffer_append_byte(buffer, 0xBD);
6444
6393
  }
6445
6394
  }
6446
6395
 
@@ -6466,18 +6415,18 @@ escape_write_byte(pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
6466
6415
  uint8_t byte2 = (uint8_t) (byte & 0xF);
6467
6416
 
6468
6417
  if (byte1 >= 0xA) {
6469
- pm_buffer_append_u8(buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
6418
+ pm_buffer_append_byte(buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
6470
6419
  } else {
6471
- pm_buffer_append_u8(buffer, (uint8_t) (byte1 + '0'));
6420
+ pm_buffer_append_byte(buffer, (uint8_t) (byte1 + '0'));
6472
6421
  }
6473
6422
 
6474
6423
  if (byte2 >= 0xA) {
6475
- pm_buffer_append_u8(buffer, (uint8_t) (byte2 - 0xA + 'A'));
6424
+ pm_buffer_append_byte(buffer, (uint8_t) (byte2 - 0xA + 'A'));
6476
6425
  } else {
6477
- pm_buffer_append_u8(buffer, (uint8_t) (byte2 + '0'));
6426
+ pm_buffer_append_byte(buffer, (uint8_t) (byte2 + '0'));
6478
6427
  }
6479
6428
  } else {
6480
- pm_buffer_append_u8(buffer, byte);
6429
+ pm_buffer_append_byte(buffer, byte);
6481
6430
  }
6482
6431
  }
6483
6432
 
@@ -6487,57 +6436,57 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
6487
6436
  switch (peek(parser)) {
6488
6437
  case '\\': {
6489
6438
  parser->current.end++;
6490
- pm_buffer_append_u8(buffer, '\\');
6439
+ pm_buffer_append_byte(buffer, '\\');
6491
6440
  return;
6492
6441
  }
6493
6442
  case '\'': {
6494
6443
  parser->current.end++;
6495
- pm_buffer_append_u8(buffer, '\'');
6444
+ pm_buffer_append_byte(buffer, '\'');
6496
6445
  return;
6497
6446
  }
6498
6447
  case 'a': {
6499
6448
  parser->current.end++;
6500
- pm_buffer_append_u8(buffer, '\a');
6449
+ pm_buffer_append_byte(buffer, '\a');
6501
6450
  return;
6502
6451
  }
6503
6452
  case 'b': {
6504
6453
  parser->current.end++;
6505
- pm_buffer_append_u8(buffer, '\b');
6454
+ pm_buffer_append_byte(buffer, '\b');
6506
6455
  return;
6507
6456
  }
6508
6457
  case 'e': {
6509
6458
  parser->current.end++;
6510
- pm_buffer_append_u8(buffer, '\033');
6459
+ pm_buffer_append_byte(buffer, '\033');
6511
6460
  return;
6512
6461
  }
6513
6462
  case 'f': {
6514
6463
  parser->current.end++;
6515
- pm_buffer_append_u8(buffer, '\f');
6464
+ pm_buffer_append_byte(buffer, '\f');
6516
6465
  return;
6517
6466
  }
6518
6467
  case 'n': {
6519
6468
  parser->current.end++;
6520
- pm_buffer_append_u8(buffer, '\n');
6469
+ pm_buffer_append_byte(buffer, '\n');
6521
6470
  return;
6522
6471
  }
6523
6472
  case 'r': {
6524
6473
  parser->current.end++;
6525
- pm_buffer_append_u8(buffer, '\r');
6474
+ pm_buffer_append_byte(buffer, '\r');
6526
6475
  return;
6527
6476
  }
6528
6477
  case 's': {
6529
6478
  parser->current.end++;
6530
- pm_buffer_append_u8(buffer, ' ');
6479
+ pm_buffer_append_byte(buffer, ' ');
6531
6480
  return;
6532
6481
  }
6533
6482
  case 't': {
6534
6483
  parser->current.end++;
6535
- pm_buffer_append_u8(buffer, '\t');
6484
+ pm_buffer_append_byte(buffer, '\t');
6536
6485
  return;
6537
6486
  }
6538
6487
  case 'v': {
6539
6488
  parser->current.end++;
6540
- pm_buffer_append_u8(buffer, '\v');
6489
+ pm_buffer_append_byte(buffer, '\v');
6541
6490
  return;
6542
6491
  }
6543
6492
  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
@@ -6554,7 +6503,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
6554
6503
  }
6555
6504
  }
6556
6505
 
6557
- pm_buffer_append_u8(buffer, value);
6506
+ pm_buffer_append_byte(buffer, value);
6558
6507
  return;
6559
6508
  }
6560
6509
  case 'x': {
@@ -6576,7 +6525,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
6576
6525
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
6577
6526
  pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end - start));
6578
6527
  } else {
6579
- pm_buffer_append_u8(buffer, value);
6528
+ pm_buffer_append_byte(buffer, value);
6580
6529
  }
6581
6530
  } else {
6582
6531
  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
@@ -6770,14 +6719,14 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
6770
6719
  case '\r': {
6771
6720
  if (peek_offset(parser, 1) == '\n') {
6772
6721
  parser->current.end += 2;
6773
- pm_buffer_append_u8(buffer, '\n');
6722
+ pm_buffer_append_byte(buffer, '\n');
6774
6723
  return;
6775
6724
  }
6776
6725
  }
6777
6726
  /* fallthrough */
6778
6727
  default: {
6779
6728
  if (parser->current.end < parser->end) {
6780
- pm_buffer_append_u8(buffer, *parser->current.end++);
6729
+ pm_buffer_append_byte(buffer, *parser->current.end++);
6781
6730
  }
6782
6731
  return;
6783
6732
  }
@@ -7031,7 +6980,7 @@ typedef struct {
7031
6980
  // Push the given byte into the token buffer.
7032
6981
  static inline void
7033
6982
  pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
7034
- pm_buffer_append_u8(&token_buffer->buffer, byte);
6983
+ pm_buffer_append_byte(&token_buffer->buffer, byte);
7035
6984
  }
7036
6985
 
7037
6986
  // When we're about to return from lexing the current token and we know for sure
@@ -8328,7 +8277,7 @@ parser_lex(pm_parser_t *parser) {
8328
8277
 
8329
8278
  // If we haven't found an escape yet, then this buffer will be
8330
8279
  // unallocated since we can refer directly to the source string.
8331
- pm_token_buffer_t token_buffer = { 0 };
8280
+ pm_token_buffer_t token_buffer = { { 0 }, 0 };
8332
8281
 
8333
8282
  while (breakpoint != NULL) {
8334
8283
  // If we hit a null byte, skip directly past it.
@@ -8504,7 +8453,7 @@ parser_lex(pm_parser_t *parser) {
8504
8453
  // characters.
8505
8454
  const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
8506
8455
  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
8507
- pm_token_buffer_t token_buffer = { 0 };
8456
+ pm_token_buffer_t token_buffer = { { 0 }, 0 };
8508
8457
 
8509
8458
  while (breakpoint != NULL) {
8510
8459
  // If we hit a null byte, skip directly past it.
@@ -8693,7 +8642,7 @@ parser_lex(pm_parser_t *parser) {
8693
8642
 
8694
8643
  // If we haven't found an escape yet, then this buffer will be
8695
8644
  // unallocated since we can refer directly to the source string.
8696
- pm_token_buffer_t token_buffer = { 0 };
8645
+ pm_token_buffer_t token_buffer = { { 0 }, 0 };
8697
8646
 
8698
8647
  while (breakpoint != NULL) {
8699
8648
  // If we hit the incrementor, then we'll increment then nesting and
@@ -8954,7 +8903,7 @@ parser_lex(pm_parser_t *parser) {
8954
8903
  }
8955
8904
 
8956
8905
  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
8957
- pm_token_buffer_t token_buffer = { 0 };
8906
+ pm_token_buffer_t token_buffer = { { 0 }, 0 };
8958
8907
  bool was_escaped_newline = false;
8959
8908
 
8960
8909
  while (breakpoint != NULL) {
@@ -9572,10 +9521,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
9572
9521
  splat->expression = parse_target(parser, splat->expression);
9573
9522
  }
9574
9523
 
9575
- pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
9576
- pm_multi_target_node_targets_append(multi_target, (pm_node_t *) splat);
9577
-
9578
- return (pm_node_t *) multi_target;
9524
+ return (pm_node_t *) splat;
9579
9525
  }
9580
9526
  case PM_CALL_NODE: {
9581
9527
  pm_call_node_t *call = (pm_call_node_t *) target;
@@ -9651,7 +9597,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
9651
9597
  }
9652
9598
  }
9653
9599
 
9654
- // Parse a write targets and validate that it is in a valid position for
9600
+ // Parse a write target and validate that it is in a valid position for
9655
9601
  // assignment.
9656
9602
  static pm_node_t *
9657
9603
  parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
@@ -9722,7 +9668,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
9722
9668
  }
9723
9669
 
9724
9670
  pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
9725
- pm_multi_target_node_targets_append(multi_target, (pm_node_t *) splat);
9671
+ pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
9726
9672
 
9727
9673
  return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
9728
9674
  }
@@ -9838,7 +9784,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
9838
9784
  bool has_splat = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
9839
9785
 
9840
9786
  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
9841
- pm_multi_target_node_targets_append(result, parse_target(parser, first_target));
9787
+ pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
9842
9788
 
9843
9789
  while (accept1(parser, PM_TOKEN_COMMA)) {
9844
9790
  if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -9858,19 +9804,19 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
9858
9804
  }
9859
9805
 
9860
9806
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
9861
- pm_multi_target_node_targets_append(result, splat);
9807
+ pm_multi_target_node_targets_append(parser, result, splat);
9862
9808
  has_splat = true;
9863
9809
  } else if (token_begins_expression_p(parser->current.type)) {
9864
9810
  pm_node_t *target = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
9865
9811
  target = parse_target(parser, target);
9866
9812
 
9867
- pm_multi_target_node_targets_append(result, target);
9868
- } else {
9813
+ pm_multi_target_node_targets_append(parser, result, target);
9814
+ } else if (!match1(parser, PM_TOKEN_EOF)) {
9869
9815
  // If we get here, then we have a trailing , in a multi target node.
9870
9816
  // We need to indicate this somehow in the tree, so we'll add an
9871
9817
  // anonymous splat.
9872
9818
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &parser->previous, NULL);
9873
- pm_multi_target_node_targets_append(result, splat);
9819
+ pm_multi_target_node_targets_append(parser, result, splat);
9874
9820
  break;
9875
9821
  }
9876
9822
  }
@@ -9963,9 +9909,11 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
9963
9909
  }
9964
9910
 
9965
9911
  // Parse all of the elements of a hash.
9966
- static void
9912
+ // Returns true if a double splat was found
9913
+ static bool
9967
9914
  parse_assocs(pm_parser_t *parser, pm_node_t *node) {
9968
9915
  assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
9916
+ bool contains_keyword_splat = false;
9969
9917
 
9970
9918
  while (true) {
9971
9919
  pm_node_t *element;
@@ -9983,6 +9931,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
9983
9931
  }
9984
9932
 
9985
9933
  element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
9934
+ contains_keyword_splat = true;
9986
9935
  break;
9987
9936
  }
9988
9937
  case PM_TOKEN_LABEL: {
@@ -10041,7 +9990,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
10041
9990
  }
10042
9991
 
10043
9992
  // If there's no comma after the element, then we're done.
10044
- if (!accept1(parser, PM_TOKEN_COMMA)) return;
9993
+ if (!accept1(parser, PM_TOKEN_COMMA)) break;
10045
9994
 
10046
9995
  // If the next element starts with a label or a **, then we know we have
10047
9996
  // another element in the hash, so we'll continue parsing.
@@ -10052,8 +10001,9 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
10052
10001
  if (token_begins_expression_p(parser->current.type)) continue;
10053
10002
 
10054
10003
  // Otherwise by default we will exit out of this loop.
10055
- return;
10004
+ break;
10056
10005
  }
10006
+ return contains_keyword_splat;
10057
10007
  }
10058
10008
 
10059
10009
  // Append an argument to a list of arguments.
@@ -10101,12 +10051,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10101
10051
  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
10102
10052
  argument = (pm_node_t *) hash;
10103
10053
 
10054
+ bool contains_keyword_splat = false;
10104
10055
  if (!match7(parser, terminator, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
10105
- parse_assocs(parser, (pm_node_t *) hash);
10056
+ contains_keyword_splat = parse_assocs(parser, (pm_node_t *) hash);
10106
10057
  }
10107
10058
 
10108
10059
  parsed_bare_hash = true;
10109
10060
  parse_arguments_append(parser, arguments, argument);
10061
+ if (contains_keyword_splat) {
10062
+ arguments->arguments->base.flags |= PM_ARGUMENTS_NODE_FLAGS_KEYWORD_SPLAT;
10063
+ }
10110
10064
  break;
10111
10065
  }
10112
10066
  case PM_TOKEN_UAMPERSAND: {
@@ -10180,6 +10134,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10180
10134
  argument = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
10181
10135
  }
10182
10136
 
10137
+ bool contains_keyword_splat = false;
10183
10138
  if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
10184
10139
  if (parsed_bare_hash) {
10185
10140
  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
@@ -10206,13 +10161,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10206
10161
  token_begins_expression_p(parser->current.type) ||
10207
10162
  match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
10208
10163
  )) {
10209
- parse_assocs(parser, (pm_node_t *) bare_hash);
10164
+ contains_keyword_splat = parse_assocs(parser, (pm_node_t *) bare_hash);
10210
10165
  }
10211
10166
 
10212
10167
  parsed_bare_hash = true;
10213
10168
  }
10214
10169
 
10215
10170
  parse_arguments_append(parser, arguments, argument);
10171
+ if (contains_keyword_splat) {
10172
+ arguments->arguments->base.flags |= PM_ARGUMENTS_NODE_FLAGS_KEYWORD_SPLAT;
10173
+ }
10216
10174
  break;
10217
10175
  }
10218
10176
  }
@@ -10248,34 +10206,27 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10248
10206
  // end
10249
10207
  //
10250
10208
  // It can recurse infinitely down, and splats are allowed to group arguments.
10251
- static pm_required_destructured_parameter_node_t *
10209
+ static pm_multi_target_node_t *
10252
10210
  parse_required_destructured_parameter(pm_parser_t *parser) {
10253
10211
  expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
10254
10212
 
10255
- pm_token_t opening = parser->previous;
10256
- pm_required_destructured_parameter_node_t *node = pm_required_destructured_parameter_node_create(parser, &opening);
10257
- bool parsed_splat = false;
10213
+ pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
10214
+ pm_multi_target_node_opening_set(node, &parser->previous);
10258
10215
 
10259
10216
  do {
10260
10217
  pm_node_t *param;
10261
10218
 
10262
- if (node->parameters.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
10263
- if (parsed_splat) {
10264
- pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT);
10265
- }
10266
-
10219
+ // If we get here then we have a trailing comma. In this case we'll
10220
+ // create an implicit splat node.
10221
+ if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
10267
10222
  param = (pm_node_t *) pm_splat_node_create(parser, &parser->previous, NULL);
10268
- pm_required_destructured_parameter_node_append_parameter(node, param);
10223
+ pm_multi_target_node_targets_append(parser, node, param);
10269
10224
  break;
10270
10225
  }
10271
10226
 
10272
10227
  if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
10273
10228
  param = (pm_node_t *) parse_required_destructured_parameter(parser);
10274
10229
  } else if (accept1(parser, PM_TOKEN_USTAR)) {
10275
- if (parsed_splat) {
10276
- pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT);
10277
- }
10278
-
10279
10230
  pm_token_t star = parser->previous;
10280
10231
  pm_node_t *value = NULL;
10281
10232
 
@@ -10287,7 +10238,6 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
10287
10238
  }
10288
10239
 
10289
10240
  param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
10290
- parsed_splat = true;
10291
10241
  } else {
10292
10242
  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
10293
10243
  pm_token_t name = parser->previous;
@@ -10297,11 +10247,11 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
10297
10247
  pm_parser_local_add_token(parser, &name);
10298
10248
  }
10299
10249
 
10300
- pm_required_destructured_parameter_node_append_parameter(node, param);
10250
+ pm_multi_target_node_targets_append(parser, node, param);
10301
10251
  } while (accept1(parser, PM_TOKEN_COMMA));
10302
10252
 
10303
10253
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
10304
- pm_required_destructured_parameter_node_closing_set(node, &parser->previous);
10254
+ pm_multi_target_node_closing_set(node, &parser->previous);
10305
10255
 
10306
10256
  return node;
10307
10257
  }
@@ -11787,28 +11737,40 @@ parse_pattern_keyword_rest(pm_parser_t *parser) {
11787
11737
  // Parse a hash pattern.
11788
11738
  static pm_hash_pattern_node_t *
11789
11739
  parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
11790
- if (PM_NODE_TYPE_P(first_assoc, PM_ASSOC_NODE)) {
11791
- if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
11792
- // Here we have a value for the first assoc in the list, so we will parse it
11793
- // now and update the first assoc.
11794
- pm_node_t *value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
11795
-
11796
- pm_assoc_node_t *assoc = (pm_assoc_node_t *) first_assoc;
11797
- assoc->base.location.end = value->location.end;
11798
- assoc->value = value;
11799
- } else {
11800
- pm_node_t *key = ((pm_assoc_node_t *) first_assoc)->key;
11740
+ pm_node_list_t assocs = PM_EMPTY_NODE_LIST;
11741
+ pm_node_t *rest = NULL;
11801
11742
 
11802
- if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
11803
- const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
11804
- pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
11743
+ switch (PM_NODE_TYPE(first_assoc)) {
11744
+ case PM_ASSOC_NODE: {
11745
+ if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
11746
+ // Here we have a value for the first assoc in the list, so we will
11747
+ // parse it now and update the first assoc.
11748
+ pm_node_t *value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
11749
+
11750
+ pm_assoc_node_t *assoc = (pm_assoc_node_t *) first_assoc;
11751
+ assoc->base.location.end = value->location.end;
11752
+ assoc->value = value;
11753
+ } else {
11754
+ pm_node_t *key = ((pm_assoc_node_t *) first_assoc)->key;
11755
+
11756
+ if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
11757
+ const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
11758
+ pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
11759
+ }
11805
11760
  }
11761
+
11762
+ pm_node_list_append(&assocs, first_assoc);
11763
+ break;
11806
11764
  }
11765
+ case PM_ASSOC_SPLAT_NODE:
11766
+ case PM_NO_KEYWORDS_PARAMETER_NODE:
11767
+ rest = first_assoc;
11768
+ break;
11769
+ default:
11770
+ assert(false);
11771
+ break;
11807
11772
  }
11808
11773
 
11809
- pm_node_list_t assocs = PM_EMPTY_NODE_LIST;
11810
- pm_node_list_append(&assocs, first_assoc);
11811
-
11812
11774
  // If there are any other assocs, then we'll parse them now.
11813
11775
  while (accept1(parser, PM_TOKEN_COMMA)) {
11814
11776
  // Here we need to break to support trailing commas.
@@ -11839,7 +11801,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
11839
11801
  pm_node_list_append(&assocs, assoc);
11840
11802
  }
11841
11803
 
11842
- pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs);
11804
+ pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
11843
11805
  free(assocs.nodes);
11844
11806
 
11845
11807
  return node;
@@ -11924,32 +11886,45 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
11924
11886
  // pattern node.
11925
11887
  node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
11926
11888
  } else {
11927
- pm_node_t *key;
11889
+ pm_node_t *first_assoc;
11928
11890
 
11929
11891
  switch (parser->current.type) {
11930
- case PM_TOKEN_LABEL:
11892
+ case PM_TOKEN_LABEL: {
11931
11893
  parser_lex(parser);
11932
- key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
11894
+
11895
+ pm_symbol_node_t *key = pm_symbol_node_label_create(parser, &parser->previous);
11896
+ pm_token_t operator = not_provided(parser);
11897
+
11898
+ first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
11933
11899
  break;
11900
+ }
11934
11901
  case PM_TOKEN_USTAR_STAR:
11935
- key = parse_pattern_keyword_rest(parser);
11902
+ first_assoc = parse_pattern_keyword_rest(parser);
11936
11903
  break;
11937
- case PM_TOKEN_STRING_BEGIN:
11938
- key = parse_expression(parser, PM_BINDING_POWER_MAX, PM_ERR_PATTERN_HASH_KEY);
11904
+ case PM_TOKEN_STRING_BEGIN: {
11905
+ pm_node_t *key = parse_expression(parser, PM_BINDING_POWER_MAX, PM_ERR_PATTERN_HASH_KEY);
11906
+ pm_token_t operator = not_provided(parser);
11907
+
11939
11908
  if (!pm_symbol_node_label_p(key)) {
11940
11909
  pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
11941
11910
  }
11942
11911
 
11912
+ first_assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL);
11943
11913
  break;
11944
- default:
11914
+ }
11915
+ default: {
11945
11916
  parser_lex(parser);
11946
11917
  pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
11947
- key = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
11918
+
11919
+ pm_missing_node_t *key = pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
11920
+ pm_token_t operator = not_provided(parser);
11921
+
11922
+ first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
11948
11923
  break;
11924
+ }
11949
11925
  }
11950
11926
 
11951
- pm_token_t operator = not_provided(parser);
11952
- node = parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL));
11927
+ node = parse_pattern_hash(parser, first_assoc);
11953
11928
 
11954
11929
  accept1(parser, PM_TOKEN_NEWLINE);
11955
11930
  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
@@ -12577,16 +12552,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
12577
12552
  parser_lex(parser);
12578
12553
  pm_accepts_block_stack_pop(parser);
12579
12554
 
12580
- // If we have a single statement and are ending on a right
12581
- // parenthesis, then we need to check if this is possibly a
12582
- // multiple target node.
12583
- if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
12555
+ if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
12556
+ // If we have a single statement and are ending on a right
12557
+ // parenthesis, then we need to check if this is possibly a
12558
+ // multiple target node.
12584
12559
  pm_multi_target_node_t *multi_target;
12585
- if (((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
12560
+
12561
+ if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
12586
12562
  multi_target = (pm_multi_target_node_t *) statement;
12587
12563
  } else {
12588
12564
  multi_target = pm_multi_target_node_create(parser);
12589
- pm_multi_target_node_targets_append(multi_target, statement);
12565
+ pm_multi_target_node_targets_append(parser, multi_target, statement);
12590
12566
  }
12591
12567
 
12592
12568
  pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
@@ -12598,10 +12574,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
12598
12574
  multi_target->base.location.end = rparen_loc.end;
12599
12575
 
12600
12576
  if (match1(parser, PM_TOKEN_COMMA)) {
12601
- return parse_targets_validate(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX);
12602
- } else {
12603
- return parse_target_validate(parser, (pm_node_t *) multi_target);
12577
+ if (binding_power == PM_BINDING_POWER_STATEMENT) {
12578
+ return parse_targets_validate(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX);
12579
+ }
12580
+ return (pm_node_t *) multi_target;
12604
12581
  }
12582
+
12583
+ return parse_target_validate(parser, (pm_node_t *) multi_target);
12605
12584
  }
12606
12585
 
12607
12586
  // If we have a single statement and are ending on a right parenthesis
@@ -13688,7 +13667,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13688
13667
  parser_lex(parser);
13689
13668
  pm_token_t for_keyword = parser->previous;
13690
13669
  pm_node_t *index;
13670
+
13691
13671
  pm_parser_scope_push_transparent(parser);
13672
+ context_push(parser, PM_CONTEXT_FOR_INDEX);
13692
13673
 
13693
13674
  // First, parse out the first index expression.
13694
13675
  if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -13714,6 +13695,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13714
13695
  index = parse_target(parser, index);
13715
13696
  }
13716
13697
 
13698
+ context_pop(parser);
13717
13699
  pm_parser_scope_pop(parser);
13718
13700
  pm_do_loop_stack_push(parser, true);
13719
13701
 
@@ -14596,6 +14578,50 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
14596
14578
  }
14597
14579
  }
14598
14580
 
14581
+ // Potentially change a =~ with a regular expression with named captures into a
14582
+ // match write node.
14583
+ static pm_node_t *
14584
+ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
14585
+ pm_string_list_t named_captures;
14586
+ pm_string_list_init(&named_captures);
14587
+
14588
+ pm_node_t *result;
14589
+ if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
14590
+ pm_match_write_node_t *match = pm_match_write_node_create(parser, call);
14591
+
14592
+ for (size_t index = 0; index < named_captures.length; index++) {
14593
+ pm_string_t *name = &named_captures.strings[index];
14594
+ pm_constant_id_t local;
14595
+
14596
+ if (content->type == PM_STRING_SHARED) {
14597
+ // If the unescaped string is a slice of the source,
14598
+ // then we can copy the names directly. The pointers
14599
+ // will line up.
14600
+ local = pm_parser_local_add_location(parser, name->source, name->source + name->length);
14601
+ } else {
14602
+ // Otherwise, the name is a slice of the malloc-ed
14603
+ // owned string, in which case we need to copy it
14604
+ // out into a new string.
14605
+ size_t length = pm_string_length(name);
14606
+
14607
+ void *memory = malloc(length);
14608
+ memcpy(memory, pm_string_source(name), length);
14609
+
14610
+ local = pm_parser_local_add_owned(parser, (const uint8_t *) memory, length);
14611
+ }
14612
+
14613
+ pm_constant_id_list_append(&match->locals, local);
14614
+ }
14615
+
14616
+ result = (pm_node_t *) match;
14617
+ } else {
14618
+ result = (pm_node_t *) call;
14619
+ }
14620
+
14621
+ pm_string_list_free(&named_captures);
14622
+ return result;
14623
+ }
14624
+
14599
14625
  static inline pm_node_t *
14600
14626
  parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power) {
14601
14627
  pm_token_t token = parser->current;
@@ -14620,18 +14646,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
14620
14646
  return parse_write(parser, node, &token, value);
14621
14647
  }
14622
14648
  case PM_SPLAT_NODE: {
14623
- pm_splat_node_t *splat_node = (pm_splat_node_t *) node;
14649
+ pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
14650
+ pm_multi_target_node_targets_append(parser, multi_target, node);
14624
14651
 
14625
- switch (PM_NODE_TYPE(splat_node->expression)) {
14626
- case PM_CASE_WRITABLE:
14627
- parser_lex(parser);
14628
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
14629
- return parse_write(parser, (pm_node_t *) splat_node, &token, value);
14630
- default:
14631
- break;
14632
- }
14652
+ parser_lex(parser);
14653
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
14654
+ return parse_write(parser, (pm_node_t *) multi_target, &token, value);
14633
14655
  }
14634
- /* fallthrough */
14635
14656
  default:
14636
14657
  parser_lex(parser);
14637
14658
 
@@ -15026,42 +15047,51 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15026
15047
  // If the receiver of this =~ is a regular expression node, then we
15027
15048
  // need to introduce local variables for it based on its named
15028
15049
  // capture groups.
15029
- if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
15030
- pm_string_list_t named_captures;
15031
- pm_string_list_init(&named_captures);
15032
-
15033
- const pm_string_t *unescaped = &((pm_regular_expression_node_t *) node)->unescaped;
15034
- if (pm_regexp_named_capture_group_names(pm_string_source(unescaped), pm_string_length(unescaped), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
15035
- pm_match_write_node_t *match = pm_match_write_node_create(parser, call);
15036
-
15037
- for (size_t index = 0; index < named_captures.length; index++) {
15038
- pm_string_t *name = &named_captures.strings[index];
15039
- pm_constant_id_t local;
15040
-
15041
- if (unescaped->type == PM_STRING_SHARED) {
15042
- // If the unescaped string is a slice of the source,
15043
- // then we can copy the names directly. The pointers
15044
- // will line up.
15045
- local = pm_parser_local_add_location(parser, name->source, name->source + name->length);
15046
- } else {
15047
- // Otherwise, the name is a slice of the malloc-ed
15048
- // owned string, in which case we need to copy it
15049
- // out into a new string.
15050
- size_t length = pm_string_length(name);
15050
+ if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
15051
+ // It's possible to have an interpolated regular expression node
15052
+ // that only contains strings. This is because it can be split
15053
+ // up by a heredoc. In this case we need to concat the unescaped
15054
+ // strings together and then parse them as a regular expression.
15055
+ pm_node_list_t *parts = &((pm_interpolated_regular_expression_node_t *) node)->parts;
15051
15056
 
15052
- void *memory = malloc(length);
15053
- memcpy(memory, pm_string_source(name), length);
15057
+ bool interpolated = false;
15058
+ size_t total_length = 0;
15054
15059
 
15055
- local = pm_parser_local_add_owned(parser, (const uint8_t *) memory, length);
15056
- }
15060
+ for (size_t index = 0; index < parts->size; index++) {
15061
+ pm_node_t *part = parts->nodes[index];
15057
15062
 
15058
- pm_constant_id_list_append(&match->locals, local);
15063
+ if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
15064
+ total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
15065
+ } else {
15066
+ interpolated = true;
15067
+ break;
15059
15068
  }
15060
-
15061
- result = (pm_node_t *) match;
15062
15069
  }
15063
15070
 
15064
- pm_string_list_free(&named_captures);
15071
+ if (!interpolated) {
15072
+ void *memory = malloc(total_length);
15073
+ if (!memory) abort();
15074
+
15075
+ uint8_t *cursor = memory;
15076
+ for (size_t index = 0; index < parts->size; index++) {
15077
+ pm_string_t *unescaped = &((pm_string_node_t *) parts->nodes[index])->unescaped;
15078
+ size_t length = pm_string_length(unescaped);
15079
+
15080
+ memcpy(cursor, pm_string_source(unescaped), length);
15081
+ cursor += length;
15082
+ }
15083
+
15084
+ pm_string_t owned;
15085
+ pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
15086
+
15087
+ result = parse_regular_expression_named_captures(parser, &owned, call);
15088
+ pm_string_free(&owned);
15089
+ }
15090
+ } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
15091
+ // If we have a regular expression node, then we can just parse
15092
+ // the named captures directly off the unescaped string.
15093
+ const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
15094
+ result = parse_regular_expression_named_captures(parser, content, call);
15065
15095
  }
15066
15096
 
15067
15097
  return result;
@@ -15667,16 +15697,20 @@ pm_parse(pm_parser_t *parser) {
15667
15697
  return parse_program(parser);
15668
15698
  }
15669
15699
 
15700
+ static inline void
15701
+ pm_serialize_header(pm_buffer_t *buffer) {
15702
+ pm_buffer_append_string(buffer, "PRISM", 5);
15703
+ pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
15704
+ pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
15705
+ pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
15706
+ pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
15707
+ }
15708
+
15670
15709
  PRISM_EXPORTED_FUNCTION void
15671
15710
  pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
15672
- pm_buffer_append_str(buffer, "PRISM", 5);
15673
- pm_buffer_append_u8(buffer, PRISM_VERSION_MAJOR);
15674
- pm_buffer_append_u8(buffer, PRISM_VERSION_MINOR);
15675
- pm_buffer_append_u8(buffer, PRISM_VERSION_PATCH);
15676
- pm_buffer_append_u8(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
15677
-
15711
+ pm_serialize_header(buffer);
15678
15712
  pm_serialize_content(parser, node, buffer);
15679
- pm_buffer_append_str(buffer, "\0", 1);
15713
+ pm_buffer_append_string(buffer, "\0", 1);
15680
15714
  }
15681
15715
 
15682
15716
  // Parse and serialize the AST represented by the given source to the given
@@ -15688,7 +15722,26 @@ pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, cons
15688
15722
  if (metadata) pm_parser_metadata(&parser, metadata);
15689
15723
 
15690
15724
  pm_node_t *node = pm_parse(&parser);
15691
- pm_serialize(&parser, node, buffer);
15725
+
15726
+ pm_serialize_header(buffer);
15727
+ pm_serialize_content(&parser, node, buffer);
15728
+ pm_buffer_append_byte(buffer, '\0');
15729
+
15730
+ pm_node_destroy(&parser, node);
15731
+ pm_parser_free(&parser);
15732
+ }
15733
+
15734
+ // Parse and serialize the comments in the given source to the given buffer.
15735
+ PRISM_EXPORTED_FUNCTION void
15736
+ pm_parse_serialize_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata) {
15737
+ pm_parser_t parser;
15738
+ pm_parser_init(&parser, source, size, NULL);
15739
+ if (metadata) pm_parser_metadata(&parser, metadata);
15740
+
15741
+ pm_node_t *node = pm_parse(&parser);
15742
+ pm_serialize_header(buffer);
15743
+ pm_serialize_encoding(&parser.encoding, buffer);
15744
+ pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
15692
15745
 
15693
15746
  pm_node_destroy(&parser, node);
15694
15747
  pm_parser_free(&parser);