prism 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/src/prism.c CHANGED
@@ -749,42 +749,97 @@ pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
749
749
  return scope;
750
750
  }
751
751
 
752
- static void
753
- pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag) {
752
+ typedef enum {
753
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
754
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
755
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
756
+ } pm_scope_forwarding_param_check_result_t;
757
+
758
+ static pm_scope_forwarding_param_check_result_t
759
+ pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
754
760
  pm_scope_t *scope = parser->current_scope;
755
- while (scope) {
761
+ bool conflict = false;
762
+
763
+ while (scope != NULL) {
756
764
  if (scope->parameters & mask) {
757
- if (!scope->closed) {
758
- pm_parser_err_token(parser, token, diag);
759
- return;
765
+ if (scope->closed) {
766
+ if (conflict) {
767
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
768
+ } else {
769
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
770
+ }
760
771
  }
761
- return;
772
+
773
+ conflict = true;
762
774
  }
775
+
763
776
  if (scope->closed) break;
764
777
  scope = scope->previous;
765
778
  }
766
779
 
767
- pm_parser_err_token(parser, token, diag);
780
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
768
781
  }
769
782
 
770
- static inline void
783
+ static void
771
784
  pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
772
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
785
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
786
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
787
+ // Pass.
788
+ break;
789
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
790
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
791
+ break;
792
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
793
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
794
+ break;
795
+ }
773
796
  }
774
797
 
775
- static inline void
798
+ static void
776
799
  pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
777
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
800
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
801
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
802
+ // Pass.
803
+ break;
804
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
805
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
806
+ break;
807
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
808
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
809
+ break;
810
+ }
778
811
  }
779
812
 
780
- static inline void
781
- pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token) {
782
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
813
+ static void
814
+ pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
815
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
816
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
817
+ // Pass.
818
+ break;
819
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
820
+ // This shouldn't happen, because ... is not allowed in the
821
+ // declaration of blocks. If we get here, we assume we already have
822
+ // an error for this.
823
+ break;
824
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
825
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
826
+ break;
827
+ }
783
828
  }
784
829
 
785
- static inline void
830
+ static void
786
831
  pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
787
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
832
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
833
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
834
+ // Pass.
835
+ break;
836
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
837
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
838
+ break;
839
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
840
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
841
+ break;
842
+ }
788
843
  }
789
844
 
790
845
  /**
@@ -1703,7 +1758,7 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
1703
1758
  * it's important that it be as fast as possible.
1704
1759
  */
1705
1760
  static inline size_t
1706
- char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
1761
+ char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
1707
1762
  if (parser->encoding_changed) {
1708
1763
  size_t width;
1709
1764
  if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
@@ -3025,8 +3080,8 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
3025
3080
  .message_loc = target->message_loc,
3026
3081
  .read_name = 0,
3027
3082
  .write_name = target->name,
3028
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3029
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3083
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3084
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3030
3085
  .value = value
3031
3086
  };
3032
3087
 
@@ -3064,8 +3119,8 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
3064
3119
  .arguments = target->arguments,
3065
3120
  .closing_loc = target->closing_loc,
3066
3121
  .block = target->block,
3067
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3068
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3122
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3123
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3069
3124
  .value = value
3070
3125
  };
3071
3126
 
@@ -3409,9 +3464,9 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
3409
3464
  },
3410
3465
  .name = target->name,
3411
3466
  .name_loc = target->base.location,
3412
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3467
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3413
3468
  .value = value,
3414
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3469
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3415
3470
  };
3416
3471
 
3417
3472
  return node;
@@ -3525,9 +3580,9 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
3525
3580
  }
3526
3581
  },
3527
3582
  .target = target,
3528
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3583
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3529
3584
  .value = value,
3530
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3585
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3531
3586
  };
3532
3587
 
3533
3588
  return node;
@@ -3652,9 +3707,9 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
3652
3707
  },
3653
3708
  .name = target->name,
3654
3709
  .name_loc = target->base.location,
3655
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3710
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3656
3711
  .value = value,
3657
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3712
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3658
3713
  };
3659
3714
 
3660
3715
  return node;
@@ -4505,9 +4560,9 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
4505
4560
  },
4506
4561
  .name = pm_global_variable_write_name(parser, target),
4507
4562
  .name_loc = target->location,
4508
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4563
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4509
4564
  .value = value,
4510
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4565
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4511
4566
  };
4512
4567
 
4513
4568
  return node;
@@ -5013,9 +5068,9 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
5013
5068
  },
5014
5069
  .name = target->name,
5015
5070
  .name_loc = target->base.location,
5016
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5071
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5017
5072
  .value = value,
5018
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5073
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5019
5074
  };
5020
5075
 
5021
5076
  return node;
@@ -5609,10 +5664,10 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
5609
5664
  }
5610
5665
  },
5611
5666
  .name_loc = target->location,
5612
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5667
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5613
5668
  .value = value,
5614
5669
  .name = name,
5615
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5670
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5616
5671
  .depth = depth
5617
5672
  };
5618
5673
 
@@ -6891,7 +6946,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
6891
6946
  case PM_REDO_NODE:
6892
6947
  case PM_RETRY_NODE:
6893
6948
  case PM_RETURN_NODE:
6894
- pm_parser_warn_node(parser, previous, PM_WARN_UNREACHABLE_STATEMENT);
6949
+ pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6895
6950
  break;
6896
6951
  default:
6897
6952
  break;
@@ -8339,7 +8394,12 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8339
8394
  // If we have hit a ractor pragma, attempt to lex that.
8340
8395
  uint32_t value_length = (uint32_t) (value_end - value_start);
8341
8396
  if (key_length == 24 && pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8342
- if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8397
+ const uint8_t *cursor = parser->current.start;
8398
+ while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8399
+
8400
+ if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8401
+ pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8402
+ } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8343
8403
  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8344
8404
  } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8345
8405
  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
@@ -8796,6 +8856,16 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8796
8856
  type = lex_optional_float_suffix(parser, seen_e);
8797
8857
  }
8798
8858
 
8859
+ // At this point we have a completed number, but we want to provide the user
8860
+ // with a good experience if they put an additional .xxx fractional
8861
+ // component on the end, so we'll check for that here.
8862
+ if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8863
+ const uint8_t *fraction_start = parser->current.end;
8864
+ const uint8_t *fraction_end = parser->current.end + 2;
8865
+ fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8866
+ pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8867
+ }
8868
+
8799
8869
  return type;
8800
8870
  }
8801
8871
 
@@ -9297,12 +9367,20 @@ escape_hexadecimal_digit(const uint8_t value) {
9297
9367
  * validated.
9298
9368
  */
9299
9369
  static inline uint32_t
9300
- escape_unicode(const uint8_t *string, size_t length) {
9370
+ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9301
9371
  uint32_t value = 0;
9302
9372
  for (size_t index = 0; index < length; index++) {
9303
9373
  if (index != 0) value <<= 4;
9304
9374
  value |= escape_hexadecimal_digit(string[index]);
9305
9375
  }
9376
+
9377
+ // Here we're going to verify that the value is actually a valid Unicode
9378
+ // codepoint and not a surrogate pair.
9379
+ if (value >= 0xD800 && value <= 0xDFFF) {
9380
+ pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9381
+ return 0xFFFD;
9382
+ }
9383
+
9306
9384
  return value;
9307
9385
  }
9308
9386
 
@@ -9551,7 +9629,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9551
9629
  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9552
9630
  }
9553
9631
 
9554
- escape_write_byte_encoded(parser, buffer, value);
9632
+ escape_write_byte_encoded(parser, buffer, escape_byte(value, flags));
9555
9633
  } else {
9556
9634
  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9557
9635
  }
@@ -9590,7 +9668,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9590
9668
  extra_codepoints_start = unicode_start;
9591
9669
  }
9592
9670
 
9593
- uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
9671
+ uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9594
9672
  escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9595
9673
 
9596
9674
  parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
@@ -9615,7 +9693,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9615
9693
  size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9616
9694
 
9617
9695
  if (length == 4) {
9618
- uint32_t value = escape_unicode(parser->current.end, 4);
9696
+ uint32_t value = escape_unicode(parser, parser->current.end, 4);
9619
9697
 
9620
9698
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9621
9699
  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
@@ -9629,6 +9707,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9629
9707
  }
9630
9708
  }
9631
9709
 
9710
+ if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9711
+ pm_parser_err(parser, start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9712
+ }
9713
+
9632
9714
  return;
9633
9715
  }
9634
9716
  case 'c': {
@@ -14036,31 +14118,37 @@ static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14036
14118
  * Check if current parameter follows valid parameters ordering. If not it adds
14037
14119
  * an error to the list without stopping the parsing, otherwise sets the
14038
14120
  * parameters state to the one corresponding to the current parameter.
14121
+ *
14122
+ * It returns true if it was successful, and false otherwise.
14039
14123
  */
14040
- static void
14124
+ static bool
14041
14125
  update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14042
14126
  pm_parameters_order_t state = parameters_ordering[token->type];
14043
- if (state == PM_PARAMETERS_NO_CHANGE) return;
14127
+ if (state == PM_PARAMETERS_NO_CHANGE) return true;
14044
14128
 
14045
14129
  // If we see another ordered argument after a optional argument
14046
14130
  // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14047
14131
  if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14048
14132
  *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14049
- return;
14133
+ return true;
14050
14134
  } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14051
- return;
14135
+ return true;
14052
14136
  }
14053
14137
 
14054
14138
  if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14055
14139
  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14056
- }
14057
-
14058
- if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14140
+ return false;
14141
+ } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14142
+ pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14143
+ return false;
14144
+ } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14059
14145
  // We know what transition we failed on, so we can provide a better error here.
14060
14146
  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14061
- } else if (state < *current) {
14062
- *current = state;
14147
+ return false;
14063
14148
  }
14149
+
14150
+ if (state < *current) *current = state;
14151
+ return true;
14064
14152
  }
14065
14153
 
14066
14154
  /**
@@ -14129,27 +14217,22 @@ parse_parameters(
14129
14217
  pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14130
14218
  }
14131
14219
 
14132
- if (order > PM_PARAMETERS_ORDER_NOTHING_AFTER) {
14133
- update_parameter_state(parser, &parser->current, &order);
14134
- parser_lex(parser);
14220
+ bool succeeded = update_parameter_state(parser, &parser->current, &order);
14221
+ parser_lex(parser);
14135
14222
 
14136
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14223
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14224
+ pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14137
14225
 
14138
- pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14139
- if (params->keyword_rest != NULL) {
14140
- // If we already have a keyword rest parameter, then we replace it with the
14141
- // forwarding parameter and move the keyword rest parameter to the posts list.
14142
- pm_node_t *keyword_rest = params->keyword_rest;
14143
- pm_parameters_node_posts_append(params, keyword_rest);
14144
- pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14145
- params->keyword_rest = NULL;
14146
- }
14147
- pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
14148
- } else {
14149
- update_parameter_state(parser, &parser->current, &order);
14150
- parser_lex(parser);
14226
+ if (params->keyword_rest != NULL) {
14227
+ // If we already have a keyword rest parameter, then we replace it with the
14228
+ // forwarding parameter and move the keyword rest parameter to the posts list.
14229
+ pm_node_t *keyword_rest = params->keyword_rest;
14230
+ pm_parameters_node_posts_append(params, keyword_rest);
14231
+ if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14232
+ params->keyword_rest = NULL;
14151
14233
  }
14152
14234
 
14235
+ pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14153
14236
  break;
14154
14237
  }
14155
14238
  case PM_TOKEN_CLASS_VARIABLE:
@@ -14244,6 +14327,12 @@ parse_parameters(
14244
14327
  pm_token_t local = name;
14245
14328
  local.end -= 1;
14246
14329
 
14330
+ if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14331
+ pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14332
+ } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14333
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14334
+ }
14335
+
14247
14336
  bool repeated = pm_parser_parameter_name_check(parser, &local);
14248
14337
  pm_parser_local_add_token(parser, &local, 1);
14249
14338
 
@@ -14808,7 +14897,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14808
14897
  arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14809
14898
  } else {
14810
14899
  pm_accepts_block_stack_push(parser, true);
14811
- parse_arguments(parser, arguments, true, PM_TOKEN_PARENTHESIS_RIGHT);
14900
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT);
14812
14901
 
14813
14902
  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14814
14903
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
@@ -14826,7 +14915,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14826
14915
  // If we get here, then the subsequent token cannot be used as an infix
14827
14916
  // operator. In this case we assume the subsequent token is part of an
14828
14917
  // argument to this method call.
14829
- parse_arguments(parser, arguments, true, PM_TOKEN_EOF);
14918
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF);
14830
14919
 
14831
14920
  // If we have done with the arguments and still not consumed the comma,
14832
14921
  // then we have a trailing comma where we need to check whether it is
@@ -14857,11 +14946,8 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14857
14946
  if (arguments->block == NULL && !arguments->has_forwarding) {
14858
14947
  arguments->block = (pm_node_t *) block;
14859
14948
  } else {
14860
- if (arguments->has_forwarding) {
14861
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
14862
- } else {
14863
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
14864
- }
14949
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
14950
+
14865
14951
  if (arguments->block != NULL) {
14866
14952
  if (arguments->arguments == NULL) {
14867
14953
  arguments->arguments = pm_arguments_node_create(parser);
@@ -15846,8 +15932,12 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
15846
15932
  nodes->size = write_index;
15847
15933
  }
15848
15934
 
15935
+ #define PM_PARSE_PATTERN_SINGLE 0
15936
+ #define PM_PARSE_PATTERN_TOP 1
15937
+ #define PM_PARSE_PATTERN_MULTI 2
15938
+
15849
15939
  static pm_node_t *
15850
- parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id);
15940
+ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id);
15851
15941
 
15852
15942
  /**
15853
15943
  * Add the newly created local to the list of captures for this pattern matching
@@ -15895,7 +15985,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
15895
15985
  accept1(parser, PM_TOKEN_NEWLINE);
15896
15986
 
15897
15987
  if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
15898
- inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
15988
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
15899
15989
  accept1(parser, PM_TOKEN_NEWLINE);
15900
15990
  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
15901
15991
  }
@@ -15907,7 +15997,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
15907
15997
  accept1(parser, PM_TOKEN_NEWLINE);
15908
15998
 
15909
15999
  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15910
- inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16000
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
15911
16001
  accept1(parser, PM_TOKEN_NEWLINE);
15912
16002
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
15913
16003
  }
@@ -16055,6 +16145,33 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
16055
16145
  return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
16056
16146
  }
16057
16147
 
16148
+ /**
16149
+ * Check that the slice of the source given by the bounds parameters constitutes
16150
+ * a valid local variable name.
16151
+ */
16152
+ static bool
16153
+ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16154
+ ptrdiff_t length = end - start;
16155
+ if (length == 0) return false;
16156
+
16157
+ // First ensure that it starts with a valid identifier starting character.
16158
+ size_t width = char_is_identifier_start(parser, start);
16159
+ if (width == 0) return false;
16160
+
16161
+ // Next, ensure that it's not an uppercase character.
16162
+ if (parser->encoding_changed) {
16163
+ if (parser->encoding->isupper_char(start, length)) return false;
16164
+ } else {
16165
+ if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16166
+ }
16167
+
16168
+ // Next, iterate through all of the bytes of the string to ensure that they
16169
+ // are all valid identifier characters.
16170
+ const uint8_t *cursor = start + width;
16171
+ while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
16172
+ return cursor == end;
16173
+ }
16174
+
16058
16175
  /**
16059
16176
  * Create an implicit node for the value of a hash pattern that has omitted the
16060
16177
  * value. This will use an implicit local variable target.
@@ -16062,14 +16179,18 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
16062
16179
  static pm_node_t *
16063
16180
  parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
16064
16181
  const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
16065
- pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
16066
16182
 
16183
+ pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
16067
16184
  int depth = -1;
16068
- if (value_loc->end[-1] == '!' || value_loc->end[-1] == '?') {
16069
- pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16070
- PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16071
- } else {
16185
+
16186
+ if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
16072
16187
  depth = pm_parser_local_depth_constant_id(parser, constant_id);
16188
+ } else {
16189
+ pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16190
+
16191
+ if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
16192
+ PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16193
+ }
16073
16194
  }
16074
16195
 
16075
16196
  if (depth == -1) {
@@ -16124,7 +16245,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16124
16245
  } else {
16125
16246
  // Here we have a value for the first assoc in the list, so
16126
16247
  // we will parse it now.
16127
- value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16248
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16128
16249
  }
16129
16250
 
16130
16251
  pm_token_t operator = not_provided(parser);
@@ -16139,7 +16260,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16139
16260
  // If we get anything else, then this is an error. For this we'll
16140
16261
  // create a missing node for the value and create an assoc node for
16141
16262
  // the first node in the list.
16142
- pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
16263
+ pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16264
+ pm_parser_err_node(parser, first_node, diag_id);
16143
16265
 
16144
16266
  pm_token_t operator = not_provided(parser);
16145
16267
  pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
@@ -16176,7 +16298,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16176
16298
  if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16177
16299
  value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
16178
16300
  } else {
16179
- value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16301
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16180
16302
  }
16181
16303
 
16182
16304
  pm_token_t operator = not_provided(parser);
@@ -16233,7 +16355,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16233
16355
 
16234
16356
  // Otherwise, we'll parse the inner pattern, then deal with it depending
16235
16357
  // on the type it returns.
16236
- pm_node_t *inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
16358
+ pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
16237
16359
 
16238
16360
  accept1(parser, PM_TOKEN_NEWLINE);
16239
16361
  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
@@ -16300,11 +16422,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16300
16422
  first_node = parse_pattern_keyword_rest(parser, captures);
16301
16423
  break;
16302
16424
  case PM_TOKEN_STRING_BEGIN:
16303
- first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
16425
+ first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY_LABEL);
16304
16426
  break;
16305
16427
  default: {
16428
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
16306
16429
  parser_lex(parser);
16307
- pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
16308
16430
 
16309
16431
  first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
16310
16432
  break;
@@ -16506,7 +16628,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
16506
16628
  pm_token_t opening = parser->current;
16507
16629
  parser_lex(parser);
16508
16630
 
16509
- pm_node_t *body = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16631
+ pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16510
16632
  accept1(parser, PM_TOKEN_NEWLINE);
16511
16633
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16512
16634
  pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
@@ -16565,7 +16687,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
16565
16687
  * Parse a pattern matching expression.
16566
16688
  */
16567
16689
  static pm_node_t *
16568
- parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id) {
16690
+ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id) {
16569
16691
  pm_node_t *node = NULL;
16570
16692
 
16571
16693
  bool leading_rest = false;
@@ -16575,14 +16697,26 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
16575
16697
  case PM_TOKEN_LABEL: {
16576
16698
  parser_lex(parser);
16577
16699
  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
16578
- return (pm_node_t *) parse_pattern_hash(parser, captures, key);
16700
+ node = (pm_node_t *) parse_pattern_hash(parser, captures, key);
16701
+
16702
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
16703
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16704
+ }
16705
+
16706
+ return node;
16579
16707
  }
16580
16708
  case PM_TOKEN_USTAR_STAR: {
16581
16709
  node = parse_pattern_keyword_rest(parser, captures);
16582
- return (pm_node_t *) parse_pattern_hash(parser, captures, node);
16710
+ node = (pm_node_t *) parse_pattern_hash(parser, captures, node);
16711
+
16712
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
16713
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16714
+ }
16715
+
16716
+ return node;
16583
16717
  }
16584
16718
  case PM_TOKEN_USTAR: {
16585
- if (top_pattern) {
16719
+ if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
16586
16720
  parser_lex(parser);
16587
16721
  node = (pm_node_t *) parse_pattern_rest(parser, captures);
16588
16722
  leading_rest = true;
@@ -16601,7 +16735,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
16601
16735
  return (pm_node_t *) parse_pattern_hash(parser, captures, node);
16602
16736
  }
16603
16737
 
16604
- if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
16738
+ if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
16605
16739
  // If we have a comma, then we are now parsing either an array pattern or a
16606
16740
  // find pattern. We need to parse all of the patterns, put them into a big
16607
16741
  // list, and then determine which type of node we have.
@@ -16912,6 +17046,11 @@ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
16912
17046
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
16913
17047
  break;
16914
17048
  }
17049
+ case PM_ERR_HASH_VALUE:
17050
+ case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17051
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17052
+ break;
17053
+ }
16915
17054
  case PM_ERR_UNARY_RECEIVER: {
16916
17055
  const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
16917
17056
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
@@ -17887,7 +18026,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17887
18026
  pm_token_t in_keyword = parser->previous;
17888
18027
 
17889
18028
  pm_constant_id_list_t captures = { 0 };
17890
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
18029
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
17891
18030
 
17892
18031
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17893
18032
  pm_constant_id_list_free(&captures);
@@ -17916,7 +18055,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17916
18055
  then_keyword = not_provided(parser);
17917
18056
  }
17918
18057
  } else {
17919
- expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18058
+ expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
17920
18059
  then_keyword = parser->previous;
17921
18060
  }
17922
18061
 
@@ -18370,7 +18509,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18370
18509
  lex_state_set(parser, PM_LEX_STATE_BEG);
18371
18510
  parser->command_start = true;
18372
18511
 
18373
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_DEF_PARAMS_TERM_PAREN);
18512
+ if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18513
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
18514
+ parser->previous.start = parser->previous.end;
18515
+ parser->previous.type = PM_TOKEN_MISSING;
18516
+ }
18517
+
18374
18518
  rparen = parser->previous;
18375
18519
  break;
18376
18520
  }
@@ -19219,6 +19363,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19219
19363
  pm_token_t opening = not_provided(parser);
19220
19364
  pm_token_t closing = not_provided(parser);
19221
19365
  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
19366
+
19367
+ if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
19368
+ // This is extremely strange, but the first string part of a
19369
+ // regular expression will always be tagged as binary if we
19370
+ // are in a US-ASCII file, no matter its contents.
19371
+ pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
19372
+ }
19373
+
19222
19374
  pm_interpolated_regular_expression_node_append(interpolated, part);
19223
19375
  } else {
19224
19376
  // If the first part of the body of the regular expression is not a
@@ -19692,39 +19844,6 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
19692
19844
  }
19693
19845
  }
19694
19846
 
19695
- /**
19696
- * Returns true if the name of the capture group is a valid local variable that
19697
- * can be written to.
19698
- */
19699
- static bool
19700
- parse_regular_expression_named_capture(pm_parser_t *parser, const uint8_t *source, size_t length) {
19701
- if (length == 0) {
19702
- return false;
19703
- }
19704
-
19705
- // First ensure that it starts with a valid identifier starting character.
19706
- size_t width = char_is_identifier_start(parser, source);
19707
- if (!width) {
19708
- return false;
19709
- }
19710
-
19711
- // Next, ensure that it's not an uppercase character.
19712
- if (parser->encoding_changed) {
19713
- if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
19714
- } else {
19715
- if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
19716
- }
19717
-
19718
- // Next, iterate through all of the bytes of the string to ensure that they
19719
- // are all valid identifier characters.
19720
- const uint8_t *cursor = source + width;
19721
- while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
19722
- cursor += width;
19723
- }
19724
-
19725
- return cursor == source + length;
19726
- }
19727
-
19728
19847
  /**
19729
19848
  * Potentially change a =~ with a regular expression with named captures into a
19730
19849
  * match write node.
@@ -19751,7 +19870,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
19751
19870
 
19752
19871
  // If the name of the capture group isn't a valid identifier, we do
19753
19872
  // not add it to the local table.
19754
- if (!parse_regular_expression_named_capture(parser, source, length)) continue;
19873
+ if (!pm_slice_is_valid_local(parser, source, source + length)) continue;
19755
19874
 
19756
19875
  if (content->type == PM_STRING_SHARED) {
19757
19876
  // If the unescaped string is a slice of the source, then we can
@@ -20209,7 +20328,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20209
20328
  // In this case we have an operator but we don't know what it's for.
20210
20329
  // We need to treat it as an error. For now, we'll mark it as an error
20211
20330
  // and just skip right past it.
20212
- pm_parser_err_previous(parser, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
20331
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
20213
20332
  return node;
20214
20333
  }
20215
20334
  }
@@ -20591,7 +20710,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20591
20710
  parser_lex(parser);
20592
20711
 
20593
20712
  pm_constant_id_list_t captures = { 0 };
20594
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
20713
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
20595
20714
 
20596
20715
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
20597
20716
  pm_constant_id_list_free(&captures);
@@ -20608,7 +20727,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20608
20727
  parser_lex(parser);
20609
20728
 
20610
20729
  pm_constant_id_list_t captures = { 0 };
20611
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
20730
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
20612
20731
 
20613
20732
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
20614
20733
  pm_constant_id_list_free(&captures);
@@ -20621,6 +20740,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20621
20740
  }
20622
20741
  }
20623
20742
 
20743
+ #undef PM_PARSE_PATTERN_SINGLE
20744
+ #undef PM_PARSE_PATTERN_TOP
20745
+ #undef PM_PARSE_PATTERN_MULTI
20746
+
20624
20747
  /**
20625
20748
  * Parse an expression at the given point of the parser using the given binding
20626
20749
  * power to parse subsequent chains. If this function finds a syntax error, it