prism 0.28.0 → 0.29.0

Sign up to get free protection for your applications and to get access to all the features.
data/src/prism.c CHANGED
@@ -749,42 +749,97 @@ pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
749
749
  return scope;
750
750
  }
751
751
 
752
- static void
753
- pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag) {
752
+ typedef enum {
753
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
754
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
755
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
756
+ } pm_scope_forwarding_param_check_result_t;
757
+
758
+ static pm_scope_forwarding_param_check_result_t
759
+ pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
754
760
  pm_scope_t *scope = parser->current_scope;
755
- while (scope) {
761
+ bool conflict = false;
762
+
763
+ while (scope != NULL) {
756
764
  if (scope->parameters & mask) {
757
- if (!scope->closed) {
758
- pm_parser_err_token(parser, token, diag);
759
- return;
765
+ if (scope->closed) {
766
+ if (conflict) {
767
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
768
+ } else {
769
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
770
+ }
760
771
  }
761
- return;
772
+
773
+ conflict = true;
762
774
  }
775
+
763
776
  if (scope->closed) break;
764
777
  scope = scope->previous;
765
778
  }
766
779
 
767
- pm_parser_err_token(parser, token, diag);
780
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
768
781
  }
769
782
 
770
- static inline void
783
+ static void
771
784
  pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
772
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
785
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
786
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
787
+ // Pass.
788
+ break;
789
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
790
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
791
+ break;
792
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
793
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
794
+ break;
795
+ }
773
796
  }
774
797
 
775
- static inline void
798
+ static void
776
799
  pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
777
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
800
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
801
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
802
+ // Pass.
803
+ break;
804
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
805
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
806
+ break;
807
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
808
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
809
+ break;
810
+ }
778
811
  }
779
812
 
780
- static inline void
781
- pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token) {
782
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
813
+ static void
814
+ pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
815
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
816
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
817
+ // Pass.
818
+ break;
819
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
820
+ // This shouldn't happen, because ... is not allowed in the
821
+ // declaration of blocks. If we get here, we assume we already have
822
+ // an error for this.
823
+ break;
824
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
825
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
826
+ break;
827
+ }
783
828
  }
784
829
 
785
- static inline void
830
+ static void
786
831
  pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
787
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
832
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
833
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
834
+ // Pass.
835
+ break;
836
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
837
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
838
+ break;
839
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
840
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
841
+ break;
842
+ }
788
843
  }
789
844
 
790
845
  /**
@@ -1703,7 +1758,7 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
1703
1758
  * it's important that it be as fast as possible.
1704
1759
  */
1705
1760
  static inline size_t
1706
- char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
1761
+ char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
1707
1762
  if (parser->encoding_changed) {
1708
1763
  size_t width;
1709
1764
  if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
@@ -3025,8 +3080,8 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
3025
3080
  .message_loc = target->message_loc,
3026
3081
  .read_name = 0,
3027
3082
  .write_name = target->name,
3028
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3029
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3083
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3084
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3030
3085
  .value = value
3031
3086
  };
3032
3087
 
@@ -3064,8 +3119,8 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
3064
3119
  .arguments = target->arguments,
3065
3120
  .closing_loc = target->closing_loc,
3066
3121
  .block = target->block,
3067
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3068
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3122
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3123
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3069
3124
  .value = value
3070
3125
  };
3071
3126
 
@@ -3409,9 +3464,9 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
3409
3464
  },
3410
3465
  .name = target->name,
3411
3466
  .name_loc = target->base.location,
3412
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3467
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3413
3468
  .value = value,
3414
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3469
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3415
3470
  };
3416
3471
 
3417
3472
  return node;
@@ -3525,9 +3580,9 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
3525
3580
  }
3526
3581
  },
3527
3582
  .target = target,
3528
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3583
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3529
3584
  .value = value,
3530
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3585
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3531
3586
  };
3532
3587
 
3533
3588
  return node;
@@ -3652,9 +3707,9 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
3652
3707
  },
3653
3708
  .name = target->name,
3654
3709
  .name_loc = target->base.location,
3655
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3710
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3656
3711
  .value = value,
3657
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3712
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3658
3713
  };
3659
3714
 
3660
3715
  return node;
@@ -4505,9 +4560,9 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
4505
4560
  },
4506
4561
  .name = pm_global_variable_write_name(parser, target),
4507
4562
  .name_loc = target->location,
4508
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4563
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4509
4564
  .value = value,
4510
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4565
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4511
4566
  };
4512
4567
 
4513
4568
  return node;
@@ -5013,9 +5068,9 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
5013
5068
  },
5014
5069
  .name = target->name,
5015
5070
  .name_loc = target->base.location,
5016
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5071
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5017
5072
  .value = value,
5018
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5073
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5019
5074
  };
5020
5075
 
5021
5076
  return node;
@@ -5609,10 +5664,10 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
5609
5664
  }
5610
5665
  },
5611
5666
  .name_loc = target->location,
5612
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5667
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5613
5668
  .value = value,
5614
5669
  .name = name,
5615
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5670
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5616
5671
  .depth = depth
5617
5672
  };
5618
5673
 
@@ -6891,7 +6946,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
6891
6946
  case PM_REDO_NODE:
6892
6947
  case PM_RETRY_NODE:
6893
6948
  case PM_RETURN_NODE:
6894
- pm_parser_warn_node(parser, previous, PM_WARN_UNREACHABLE_STATEMENT);
6949
+ pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6895
6950
  break;
6896
6951
  default:
6897
6952
  break;
@@ -8339,7 +8394,12 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8339
8394
  // If we have hit a ractor pragma, attempt to lex that.
8340
8395
  uint32_t value_length = (uint32_t) (value_end - value_start);
8341
8396
  if (key_length == 24 && pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8342
- if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8397
+ const uint8_t *cursor = parser->current.start;
8398
+ while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8399
+
8400
+ if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8401
+ pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8402
+ } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8343
8403
  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8344
8404
  } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8345
8405
  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
@@ -8796,6 +8856,16 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8796
8856
  type = lex_optional_float_suffix(parser, seen_e);
8797
8857
  }
8798
8858
 
8859
+ // At this point we have a completed number, but we want to provide the user
8860
+ // with a good experience if they put an additional .xxx fractional
8861
+ // component on the end, so we'll check for that here.
8862
+ if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8863
+ const uint8_t *fraction_start = parser->current.end;
8864
+ const uint8_t *fraction_end = parser->current.end + 2;
8865
+ fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8866
+ pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8867
+ }
8868
+
8799
8869
  return type;
8800
8870
  }
8801
8871
 
@@ -9297,12 +9367,20 @@ escape_hexadecimal_digit(const uint8_t value) {
9297
9367
  * validated.
9298
9368
  */
9299
9369
  static inline uint32_t
9300
- escape_unicode(const uint8_t *string, size_t length) {
9370
+ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9301
9371
  uint32_t value = 0;
9302
9372
  for (size_t index = 0; index < length; index++) {
9303
9373
  if (index != 0) value <<= 4;
9304
9374
  value |= escape_hexadecimal_digit(string[index]);
9305
9375
  }
9376
+
9377
+ // Here we're going to verify that the value is actually a valid Unicode
9378
+ // codepoint and not a surrogate pair.
9379
+ if (value >= 0xD800 && value <= 0xDFFF) {
9380
+ pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9381
+ return 0xFFFD;
9382
+ }
9383
+
9306
9384
  return value;
9307
9385
  }
9308
9386
 
@@ -9551,7 +9629,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9551
9629
  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9552
9630
  }
9553
9631
 
9554
- escape_write_byte_encoded(parser, buffer, value);
9632
+ escape_write_byte_encoded(parser, buffer, escape_byte(value, flags));
9555
9633
  } else {
9556
9634
  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9557
9635
  }
@@ -9590,7 +9668,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9590
9668
  extra_codepoints_start = unicode_start;
9591
9669
  }
9592
9670
 
9593
- uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
9671
+ uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9594
9672
  escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9595
9673
 
9596
9674
  parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
@@ -9615,7 +9693,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9615
9693
  size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9616
9694
 
9617
9695
  if (length == 4) {
9618
- uint32_t value = escape_unicode(parser->current.end, 4);
9696
+ uint32_t value = escape_unicode(parser, parser->current.end, 4);
9619
9697
 
9620
9698
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9621
9699
  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
@@ -9629,6 +9707,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9629
9707
  }
9630
9708
  }
9631
9709
 
9710
+ if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9711
+ pm_parser_err(parser, start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9712
+ }
9713
+
9632
9714
  return;
9633
9715
  }
9634
9716
  case 'c': {
@@ -14036,31 +14118,37 @@ static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14036
14118
  * Check if current parameter follows valid parameters ordering. If not it adds
14037
14119
  * an error to the list without stopping the parsing, otherwise sets the
14038
14120
  * parameters state to the one corresponding to the current parameter.
14121
+ *
14122
+ * It returns true if it was successful, and false otherwise.
14039
14123
  */
14040
- static void
14124
+ static bool
14041
14125
  update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14042
14126
  pm_parameters_order_t state = parameters_ordering[token->type];
14043
- if (state == PM_PARAMETERS_NO_CHANGE) return;
14127
+ if (state == PM_PARAMETERS_NO_CHANGE) return true;
14044
14128
 
14045
14129
  // If we see another ordered argument after a optional argument
14046
14130
  // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14047
14131
  if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14048
14132
  *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14049
- return;
14133
+ return true;
14050
14134
  } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14051
- return;
14135
+ return true;
14052
14136
  }
14053
14137
 
14054
14138
  if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14055
14139
  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14056
- }
14057
-
14058
- if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14140
+ return false;
14141
+ } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14142
+ pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14143
+ return false;
14144
+ } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14059
14145
  // We know what transition we failed on, so we can provide a better error here.
14060
14146
  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14061
- } else if (state < *current) {
14062
- *current = state;
14147
+ return false;
14063
14148
  }
14149
+
14150
+ if (state < *current) *current = state;
14151
+ return true;
14064
14152
  }
14065
14153
 
14066
14154
  /**
@@ -14129,27 +14217,22 @@ parse_parameters(
14129
14217
  pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14130
14218
  }
14131
14219
 
14132
- if (order > PM_PARAMETERS_ORDER_NOTHING_AFTER) {
14133
- update_parameter_state(parser, &parser->current, &order);
14134
- parser_lex(parser);
14220
+ bool succeeded = update_parameter_state(parser, &parser->current, &order);
14221
+ parser_lex(parser);
14135
14222
 
14136
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14223
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14224
+ pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14137
14225
 
14138
- pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14139
- if (params->keyword_rest != NULL) {
14140
- // If we already have a keyword rest parameter, then we replace it with the
14141
- // forwarding parameter and move the keyword rest parameter to the posts list.
14142
- pm_node_t *keyword_rest = params->keyword_rest;
14143
- pm_parameters_node_posts_append(params, keyword_rest);
14144
- pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14145
- params->keyword_rest = NULL;
14146
- }
14147
- pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
14148
- } else {
14149
- update_parameter_state(parser, &parser->current, &order);
14150
- parser_lex(parser);
14226
+ if (params->keyword_rest != NULL) {
14227
+ // If we already have a keyword rest parameter, then we replace it with the
14228
+ // forwarding parameter and move the keyword rest parameter to the posts list.
14229
+ pm_node_t *keyword_rest = params->keyword_rest;
14230
+ pm_parameters_node_posts_append(params, keyword_rest);
14231
+ if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14232
+ params->keyword_rest = NULL;
14151
14233
  }
14152
14234
 
14235
+ pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14153
14236
  break;
14154
14237
  }
14155
14238
  case PM_TOKEN_CLASS_VARIABLE:
@@ -14244,6 +14327,12 @@ parse_parameters(
14244
14327
  pm_token_t local = name;
14245
14328
  local.end -= 1;
14246
14329
 
14330
+ if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14331
+ pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14332
+ } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14333
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14334
+ }
14335
+
14247
14336
  bool repeated = pm_parser_parameter_name_check(parser, &local);
14248
14337
  pm_parser_local_add_token(parser, &local, 1);
14249
14338
 
@@ -14808,7 +14897,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14808
14897
  arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14809
14898
  } else {
14810
14899
  pm_accepts_block_stack_push(parser, true);
14811
- parse_arguments(parser, arguments, true, PM_TOKEN_PARENTHESIS_RIGHT);
14900
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT);
14812
14901
 
14813
14902
  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14814
14903
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
@@ -14826,7 +14915,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14826
14915
  // If we get here, then the subsequent token cannot be used as an infix
14827
14916
  // operator. In this case we assume the subsequent token is part of an
14828
14917
  // argument to this method call.
14829
- parse_arguments(parser, arguments, true, PM_TOKEN_EOF);
14918
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF);
14830
14919
 
14831
14920
  // If we have done with the arguments and still not consumed the comma,
14832
14921
  // then we have a trailing comma where we need to check whether it is
@@ -14857,11 +14946,8 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14857
14946
  if (arguments->block == NULL && !arguments->has_forwarding) {
14858
14947
  arguments->block = (pm_node_t *) block;
14859
14948
  } else {
14860
- if (arguments->has_forwarding) {
14861
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
14862
- } else {
14863
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
14864
- }
14949
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
14950
+
14865
14951
  if (arguments->block != NULL) {
14866
14952
  if (arguments->arguments == NULL) {
14867
14953
  arguments->arguments = pm_arguments_node_create(parser);
@@ -15846,8 +15932,12 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
15846
15932
  nodes->size = write_index;
15847
15933
  }
15848
15934
 
15935
+ #define PM_PARSE_PATTERN_SINGLE 0
15936
+ #define PM_PARSE_PATTERN_TOP 1
15937
+ #define PM_PARSE_PATTERN_MULTI 2
15938
+
15849
15939
  static pm_node_t *
15850
- parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id);
15940
+ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id);
15851
15941
 
15852
15942
  /**
15853
15943
  * Add the newly created local to the list of captures for this pattern matching
@@ -15895,7 +15985,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
15895
15985
  accept1(parser, PM_TOKEN_NEWLINE);
15896
15986
 
15897
15987
  if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
15898
- inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
15988
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
15899
15989
  accept1(parser, PM_TOKEN_NEWLINE);
15900
15990
  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
15901
15991
  }
@@ -15907,7 +15997,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
15907
15997
  accept1(parser, PM_TOKEN_NEWLINE);
15908
15998
 
15909
15999
  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15910
- inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16000
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
15911
16001
  accept1(parser, PM_TOKEN_NEWLINE);
15912
16002
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
15913
16003
  }
@@ -16055,6 +16145,33 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
16055
16145
  return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
16056
16146
  }
16057
16147
 
16148
+ /**
16149
+ * Check that the slice of the source given by the bounds parameters constitutes
16150
+ * a valid local variable name.
16151
+ */
16152
+ static bool
16153
+ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16154
+ ptrdiff_t length = end - start;
16155
+ if (length == 0) return false;
16156
+
16157
+ // First ensure that it starts with a valid identifier starting character.
16158
+ size_t width = char_is_identifier_start(parser, start);
16159
+ if (width == 0) return false;
16160
+
16161
+ // Next, ensure that it's not an uppercase character.
16162
+ if (parser->encoding_changed) {
16163
+ if (parser->encoding->isupper_char(start, length)) return false;
16164
+ } else {
16165
+ if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16166
+ }
16167
+
16168
+ // Next, iterate through all of the bytes of the string to ensure that they
16169
+ // are all valid identifier characters.
16170
+ const uint8_t *cursor = start + width;
16171
+ while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
16172
+ return cursor == end;
16173
+ }
16174
+
16058
16175
  /**
16059
16176
  * Create an implicit node for the value of a hash pattern that has omitted the
16060
16177
  * value. This will use an implicit local variable target.
@@ -16062,14 +16179,18 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
16062
16179
  static pm_node_t *
16063
16180
  parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
16064
16181
  const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
16065
- pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
16066
16182
 
16183
+ pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
16067
16184
  int depth = -1;
16068
- if (value_loc->end[-1] == '!' || value_loc->end[-1] == '?') {
16069
- pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16070
- PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16071
- } else {
16185
+
16186
+ if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
16072
16187
  depth = pm_parser_local_depth_constant_id(parser, constant_id);
16188
+ } else {
16189
+ pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16190
+
16191
+ if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
16192
+ PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16193
+ }
16073
16194
  }
16074
16195
 
16075
16196
  if (depth == -1) {
@@ -16124,7 +16245,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16124
16245
  } else {
16125
16246
  // Here we have a value for the first assoc in the list, so
16126
16247
  // we will parse it now.
16127
- value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16248
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16128
16249
  }
16129
16250
 
16130
16251
  pm_token_t operator = not_provided(parser);
@@ -16139,7 +16260,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16139
16260
  // If we get anything else, then this is an error. For this we'll
16140
16261
  // create a missing node for the value and create an assoc node for
16141
16262
  // the first node in the list.
16142
- pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
16263
+ pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16264
+ pm_parser_err_node(parser, first_node, diag_id);
16143
16265
 
16144
16266
  pm_token_t operator = not_provided(parser);
16145
16267
  pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
@@ -16176,7 +16298,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16176
16298
  if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16177
16299
  value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
16178
16300
  } else {
16179
- value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16301
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16180
16302
  }
16181
16303
 
16182
16304
  pm_token_t operator = not_provided(parser);
@@ -16233,7 +16355,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16233
16355
 
16234
16356
  // Otherwise, we'll parse the inner pattern, then deal with it depending
16235
16357
  // on the type it returns.
16236
- pm_node_t *inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
16358
+ pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
16237
16359
 
16238
16360
  accept1(parser, PM_TOKEN_NEWLINE);
16239
16361
  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
@@ -16300,11 +16422,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16300
16422
  first_node = parse_pattern_keyword_rest(parser, captures);
16301
16423
  break;
16302
16424
  case PM_TOKEN_STRING_BEGIN:
16303
- first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
16425
+ first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY_LABEL);
16304
16426
  break;
16305
16427
  default: {
16428
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
16306
16429
  parser_lex(parser);
16307
- pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
16308
16430
 
16309
16431
  first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
16310
16432
  break;
@@ -16506,7 +16628,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
16506
16628
  pm_token_t opening = parser->current;
16507
16629
  parser_lex(parser);
16508
16630
 
16509
- pm_node_t *body = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16631
+ pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16510
16632
  accept1(parser, PM_TOKEN_NEWLINE);
16511
16633
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16512
16634
  pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
@@ -16565,7 +16687,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
16565
16687
  * Parse a pattern matching expression.
16566
16688
  */
16567
16689
  static pm_node_t *
16568
- parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id) {
16690
+ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id) {
16569
16691
  pm_node_t *node = NULL;
16570
16692
 
16571
16693
  bool leading_rest = false;
@@ -16575,14 +16697,26 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
16575
16697
  case PM_TOKEN_LABEL: {
16576
16698
  parser_lex(parser);
16577
16699
  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
16578
- return (pm_node_t *) parse_pattern_hash(parser, captures, key);
16700
+ node = (pm_node_t *) parse_pattern_hash(parser, captures, key);
16701
+
16702
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
16703
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16704
+ }
16705
+
16706
+ return node;
16579
16707
  }
16580
16708
  case PM_TOKEN_USTAR_STAR: {
16581
16709
  node = parse_pattern_keyword_rest(parser, captures);
16582
- return (pm_node_t *) parse_pattern_hash(parser, captures, node);
16710
+ node = (pm_node_t *) parse_pattern_hash(parser, captures, node);
16711
+
16712
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
16713
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16714
+ }
16715
+
16716
+ return node;
16583
16717
  }
16584
16718
  case PM_TOKEN_USTAR: {
16585
- if (top_pattern) {
16719
+ if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
16586
16720
  parser_lex(parser);
16587
16721
  node = (pm_node_t *) parse_pattern_rest(parser, captures);
16588
16722
  leading_rest = true;
@@ -16601,7 +16735,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
16601
16735
  return (pm_node_t *) parse_pattern_hash(parser, captures, node);
16602
16736
  }
16603
16737
 
16604
- if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
16738
+ if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
16605
16739
  // If we have a comma, then we are now parsing either an array pattern or a
16606
16740
  // find pattern. We need to parse all of the patterns, put them into a big
16607
16741
  // list, and then determine which type of node we have.
@@ -16912,6 +17046,11 @@ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
16912
17046
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
16913
17047
  break;
16914
17048
  }
17049
+ case PM_ERR_HASH_VALUE:
17050
+ case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17051
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17052
+ break;
17053
+ }
16915
17054
  case PM_ERR_UNARY_RECEIVER: {
16916
17055
  const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
16917
17056
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
@@ -17887,7 +18026,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17887
18026
  pm_token_t in_keyword = parser->previous;
17888
18027
 
17889
18028
  pm_constant_id_list_t captures = { 0 };
17890
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
18029
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
17891
18030
 
17892
18031
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17893
18032
  pm_constant_id_list_free(&captures);
@@ -17916,7 +18055,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17916
18055
  then_keyword = not_provided(parser);
17917
18056
  }
17918
18057
  } else {
17919
- expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18058
+ expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
17920
18059
  then_keyword = parser->previous;
17921
18060
  }
17922
18061
 
@@ -18370,7 +18509,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18370
18509
  lex_state_set(parser, PM_LEX_STATE_BEG);
18371
18510
  parser->command_start = true;
18372
18511
 
18373
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_DEF_PARAMS_TERM_PAREN);
18512
+ if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18513
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
18514
+ parser->previous.start = parser->previous.end;
18515
+ parser->previous.type = PM_TOKEN_MISSING;
18516
+ }
18517
+
18374
18518
  rparen = parser->previous;
18375
18519
  break;
18376
18520
  }
@@ -19219,6 +19363,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19219
19363
  pm_token_t opening = not_provided(parser);
19220
19364
  pm_token_t closing = not_provided(parser);
19221
19365
  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
19366
+
19367
+ if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
19368
+ // This is extremely strange, but the first string part of a
19369
+ // regular expression will always be tagged as binary if we
19370
+ // are in a US-ASCII file, no matter its contents.
19371
+ pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
19372
+ }
19373
+
19222
19374
  pm_interpolated_regular_expression_node_append(interpolated, part);
19223
19375
  } else {
19224
19376
  // If the first part of the body of the regular expression is not a
@@ -19692,39 +19844,6 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
19692
19844
  }
19693
19845
  }
19694
19846
 
19695
- /**
19696
- * Returns true if the name of the capture group is a valid local variable that
19697
- * can be written to.
19698
- */
19699
- static bool
19700
- parse_regular_expression_named_capture(pm_parser_t *parser, const uint8_t *source, size_t length) {
19701
- if (length == 0) {
19702
- return false;
19703
- }
19704
-
19705
- // First ensure that it starts with a valid identifier starting character.
19706
- size_t width = char_is_identifier_start(parser, source);
19707
- if (!width) {
19708
- return false;
19709
- }
19710
-
19711
- // Next, ensure that it's not an uppercase character.
19712
- if (parser->encoding_changed) {
19713
- if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
19714
- } else {
19715
- if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
19716
- }
19717
-
19718
- // Next, iterate through all of the bytes of the string to ensure that they
19719
- // are all valid identifier characters.
19720
- const uint8_t *cursor = source + width;
19721
- while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
19722
- cursor += width;
19723
- }
19724
-
19725
- return cursor == source + length;
19726
- }
19727
-
19728
19847
  /**
19729
19848
  * Potentially change a =~ with a regular expression with named captures into a
19730
19849
  * match write node.
@@ -19751,7 +19870,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
19751
19870
 
19752
19871
  // If the name of the capture group isn't a valid identifier, we do
19753
19872
  // not add it to the local table.
19754
- if (!parse_regular_expression_named_capture(parser, source, length)) continue;
19873
+ if (!pm_slice_is_valid_local(parser, source, source + length)) continue;
19755
19874
 
19756
19875
  if (content->type == PM_STRING_SHARED) {
19757
19876
  // If the unescaped string is a slice of the source, then we can
@@ -20209,7 +20328,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20209
20328
  // In this case we have an operator but we don't know what it's for.
20210
20329
  // We need to treat it as an error. For now, we'll mark it as an error
20211
20330
  // and just skip right past it.
20212
- pm_parser_err_previous(parser, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
20331
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
20213
20332
  return node;
20214
20333
  }
20215
20334
  }
@@ -20591,7 +20710,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20591
20710
  parser_lex(parser);
20592
20711
 
20593
20712
  pm_constant_id_list_t captures = { 0 };
20594
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
20713
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
20595
20714
 
20596
20715
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
20597
20716
  pm_constant_id_list_free(&captures);
@@ -20608,7 +20727,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20608
20727
  parser_lex(parser);
20609
20728
 
20610
20729
  pm_constant_id_list_t captures = { 0 };
20611
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
20730
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
20612
20731
 
20613
20732
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
20614
20733
  pm_constant_id_list_free(&captures);
@@ -20621,6 +20740,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20621
20740
  }
20622
20741
  }
20623
20742
 
20743
+ #undef PM_PARSE_PATTERN_SINGLE
20744
+ #undef PM_PARSE_PATTERN_TOP
20745
+ #undef PM_PARSE_PATTERN_MULTI
20746
+
20624
20747
  /**
20625
20748
  * Parse an expression at the given point of the parser using the given binding
20626
20749
  * power to parse subsequent chains. If this function finds a syntax error, it