prism 0.28.0 → 0.30.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +41 -1
  3. data/CONTRIBUTING.md +0 -4
  4. data/README.md +1 -0
  5. data/config.yml +95 -26
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/ripper_translation.md +22 -0
  8. data/ext/prism/api_node.c +70 -52
  9. data/ext/prism/extconf.rb +27 -23
  10. data/ext/prism/extension.c +107 -372
  11. data/ext/prism/extension.h +1 -1
  12. data/include/prism/ast.h +170 -102
  13. data/include/prism/diagnostic.h +18 -3
  14. data/include/prism/node.h +0 -21
  15. data/include/prism/parser.h +23 -25
  16. data/include/prism/regexp.h +17 -8
  17. data/include/prism/static_literals.h +3 -2
  18. data/include/prism/util/pm_char.h +1 -2
  19. data/include/prism/util/pm_constant_pool.h +0 -8
  20. data/include/prism/util/pm_integer.h +16 -9
  21. data/include/prism/util/pm_string.h +0 -8
  22. data/include/prism/version.h +2 -2
  23. data/include/prism.h +0 -11
  24. data/lib/prism/compiler.rb +3 -0
  25. data/lib/prism/desugar_compiler.rb +4 -4
  26. data/lib/prism/dispatcher.rb +14 -0
  27. data/lib/prism/dot_visitor.rb +54 -35
  28. data/lib/prism/dsl.rb +23 -18
  29. data/lib/prism/ffi.rb +25 -4
  30. data/lib/prism/inspect_visitor.rb +26 -24
  31. data/lib/prism/mutation_compiler.rb +6 -1
  32. data/lib/prism/node.rb +314 -389
  33. data/lib/prism/node_ext.rb +175 -17
  34. data/lib/prism/parse_result/comments.rb +1 -8
  35. data/lib/prism/parse_result/newlines.rb +102 -12
  36. data/lib/prism/parse_result.rb +17 -0
  37. data/lib/prism/reflection.rb +11 -9
  38. data/lib/prism/serialize.rb +91 -68
  39. data/lib/prism/translation/parser/compiler.rb +288 -138
  40. data/lib/prism/translation/parser.rb +7 -2
  41. data/lib/prism/translation/ripper.rb +24 -22
  42. data/lib/prism/translation/ruby_parser.rb +32 -14
  43. data/lib/prism/visitor.rb +3 -0
  44. data/lib/prism.rb +0 -4
  45. data/prism.gemspec +2 -4
  46. data/rbi/prism/node.rbi +114 -57
  47. data/rbi/prism/node_ext.rbi +5 -0
  48. data/rbi/prism/parse_result.rbi +1 -1
  49. data/rbi/prism/visitor.rbi +3 -0
  50. data/rbi/prism.rbi +6 -0
  51. data/sig/prism/dsl.rbs +13 -10
  52. data/sig/prism/lex_compat.rbs +10 -0
  53. data/sig/prism/mutation_compiler.rbs +1 -0
  54. data/sig/prism/node.rbs +72 -48
  55. data/sig/prism/node_ext.rbs +4 -0
  56. data/sig/prism/visitor.rbs +1 -0
  57. data/sig/prism.rbs +21 -0
  58. data/src/diagnostic.c +56 -27
  59. data/src/node.c +432 -1690
  60. data/src/prettyprint.c +97 -54
  61. data/src/prism.c +1286 -1196
  62. data/src/regexp.c +133 -68
  63. data/src/serialize.c +22 -17
  64. data/src/static_literals.c +63 -84
  65. data/src/token_type.c +4 -4
  66. data/src/util/pm_constant_pool.c +0 -8
  67. data/src/util/pm_integer.c +39 -11
  68. data/src/util/pm_string.c +0 -12
  69. data/src/util/pm_strpbrk.c +32 -6
  70. metadata +3 -5
  71. data/include/prism/util/pm_string_list.h +0 -44
  72. data/lib/prism/debug.rb +0 -249
  73. data/src/util/pm_string_list.c +0 -28
data/src/prism.c CHANGED
@@ -423,7 +423,7 @@ lex_mode_pop(pm_parser_t *parser) {
423
423
  * This is the equivalent of IS_lex_state is CRuby.
424
424
  */
425
425
  static inline bool
426
- lex_state_p(pm_parser_t *parser, pm_lex_state_t state) {
426
+ lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
427
427
  return parser->lex_state & state;
428
428
  }
429
429
 
@@ -708,7 +708,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
708
708
  .previous = parser->current_scope,
709
709
  .locals = { 0 },
710
710
  .parameters = PM_SCOPE_PARAMETERS_NONE,
711
- .numbered_parameters = PM_SCOPE_NUMBERED_PARAMETERS_NONE,
711
+ .implicit_parameters = { 0 },
712
712
  .shareable_constant = (closed || parser->current_scope == NULL) ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
713
713
  .closed = closed
714
714
  };
@@ -749,42 +749,97 @@ pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
749
749
  return scope;
750
750
  }
751
751
 
752
- static void
753
- pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag) {
752
+ typedef enum {
753
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
754
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
755
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
756
+ } pm_scope_forwarding_param_check_result_t;
757
+
758
+ static pm_scope_forwarding_param_check_result_t
759
+ pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
754
760
  pm_scope_t *scope = parser->current_scope;
755
- while (scope) {
761
+ bool conflict = false;
762
+
763
+ while (scope != NULL) {
756
764
  if (scope->parameters & mask) {
757
- if (!scope->closed) {
758
- pm_parser_err_token(parser, token, diag);
759
- return;
765
+ if (scope->closed) {
766
+ if (conflict) {
767
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
768
+ } else {
769
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
770
+ }
760
771
  }
761
- return;
772
+
773
+ conflict = true;
762
774
  }
775
+
763
776
  if (scope->closed) break;
764
777
  scope = scope->previous;
765
778
  }
766
779
 
767
- pm_parser_err_token(parser, token, diag);
780
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
768
781
  }
769
782
 
770
- static inline void
783
+ static void
771
784
  pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
772
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
785
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
786
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
787
+ // Pass.
788
+ break;
789
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
790
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
791
+ break;
792
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
793
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
794
+ break;
795
+ }
773
796
  }
774
797
 
775
- static inline void
798
+ static void
776
799
  pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
777
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
800
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
801
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
802
+ // Pass.
803
+ break;
804
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
805
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
806
+ break;
807
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
808
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
809
+ break;
810
+ }
778
811
  }
779
812
 
780
- static inline void
781
- pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token) {
782
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
813
+ static void
814
+ pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
815
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
816
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
817
+ // Pass.
818
+ break;
819
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
820
+ // This shouldn't happen, because ... is not allowed in the
821
+ // declaration of blocks. If we get here, we assume we already have
822
+ // an error for this.
823
+ break;
824
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
825
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
826
+ break;
827
+ }
783
828
  }
784
829
 
785
- static inline void
830
+ static void
786
831
  pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
787
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
832
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
833
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
834
+ // Pass.
835
+ break;
836
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
837
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
838
+ break;
839
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
840
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
841
+ break;
842
+ }
788
843
  }
789
844
 
790
845
  /**
@@ -1128,6 +1183,31 @@ pm_check_value_expression(pm_node_t *node) {
1128
1183
  return NULL;
1129
1184
  case PM_BEGIN_NODE: {
1130
1185
  pm_begin_node_t *cast = (pm_begin_node_t *) node;
1186
+
1187
+ if (cast->statements == NULL && cast->ensure_clause != NULL) {
1188
+ node = (pm_node_t *) cast->ensure_clause;
1189
+ }
1190
+ else {
1191
+ if (cast->rescue_clause != NULL) {
1192
+ if (cast->rescue_clause->statements == NULL) {
1193
+ return NULL;
1194
+ }
1195
+ else if (cast->else_clause != NULL) {
1196
+ node = (pm_node_t *) cast->else_clause;
1197
+ }
1198
+ else {
1199
+ node = (pm_node_t *) cast->statements;
1200
+ }
1201
+ }
1202
+ else {
1203
+ node = (pm_node_t *) cast->statements;
1204
+ }
1205
+ }
1206
+
1207
+ break;
1208
+ }
1209
+ case PM_ENSURE_NODE: {
1210
+ pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1131
1211
  node = (pm_node_t *) cast->statements;
1132
1212
  break;
1133
1213
  }
@@ -1575,7 +1655,7 @@ not_provided(pm_parser_t *parser) {
1575
1655
  return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1576
1656
  }
1577
1657
 
1578
- #define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = parser->start, .end = parser->start })
1658
+ #define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1579
1659
  #define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1580
1660
  #define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1581
1661
  #define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
@@ -1703,7 +1783,7 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
1703
1783
  * it's important that it be as fast as possible.
1704
1784
  */
1705
1785
  static inline size_t
1706
- char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
1786
+ char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
1707
1787
  if (parser->encoding_changed) {
1708
1788
  size_t width;
1709
1789
  if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
@@ -2772,8 +2852,7 @@ static pm_call_node_t *
2772
2852
  pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2773
2853
  pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2774
2854
 
2775
- node->base.location.start = parser->start;
2776
- node->base.location.end = parser->start;
2855
+ node->base.location = PM_LOCATION_NULL_VALUE(parser);
2777
2856
  node->arguments = arguments;
2778
2857
 
2779
2858
  node->name = name;
@@ -3025,8 +3104,8 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
3025
3104
  .message_loc = target->message_loc,
3026
3105
  .read_name = 0,
3027
3106
  .write_name = target->name,
3028
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3029
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3107
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3108
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3030
3109
  .value = value
3031
3110
  };
3032
3111
 
@@ -3064,8 +3143,8 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
3064
3143
  .arguments = target->arguments,
3065
3144
  .closing_loc = target->closing_loc,
3066
3145
  .block = target->block,
3067
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3068
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3146
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3147
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3069
3148
  .value = value
3070
3149
  };
3071
3150
 
@@ -3409,9 +3488,9 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
3409
3488
  },
3410
3489
  .name = target->name,
3411
3490
  .name_loc = target->base.location,
3412
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3491
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3413
3492
  .value = value,
3414
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3493
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3415
3494
  };
3416
3495
 
3417
3496
  return node;
@@ -3525,9 +3604,9 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
3525
3604
  }
3526
3605
  },
3527
3606
  .target = target,
3528
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3607
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3529
3608
  .value = value,
3530
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3609
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3531
3610
  };
3532
3611
 
3533
3612
  return node;
@@ -3652,9 +3731,9 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
3652
3731
  },
3653
3732
  .name = target->name,
3654
3733
  .name_loc = target->base.location,
3655
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3734
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3656
3735
  .value = value,
3657
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3736
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3658
3737
  };
3659
3738
 
3660
3739
  return node;
@@ -4236,7 +4315,7 @@ pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4236
4315
  }
4237
4316
 
4238
4317
  /**
4239
- * Allocate and initialize a new FloatNode node from a FLOAT_RATIONAL token.
4318
+ * Allocate and initialize a new RationalNode node from a FLOAT_RATIONAL token.
4240
4319
  */
4241
4320
  static pm_rational_node_t *
4242
4321
  pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
@@ -4246,16 +4325,44 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4246
4325
  *node = (pm_rational_node_t) {
4247
4326
  {
4248
4327
  .type = PM_RATIONAL_NODE,
4249
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
4328
+ .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4250
4329
  .location = PM_LOCATION_TOKEN_VALUE(token)
4251
4330
  },
4252
- .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4253
- .type = PM_TOKEN_FLOAT,
4254
- .start = token->start,
4255
- .end = token->end - 1
4256
- }))
4331
+ .numerator = { 0 },
4332
+ .denominator = { 0 }
4257
4333
  };
4258
4334
 
4335
+ const uint8_t *start = token->start;
4336
+ const uint8_t *end = token->end - 1; // r
4337
+
4338
+ while (start < end && *start == '0') start++; // 0.1 -> .1
4339
+ while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4340
+
4341
+ size_t length = (size_t) (end - start);
4342
+ if (length == 1) {
4343
+ node->denominator.value = 1;
4344
+ return node;
4345
+ }
4346
+
4347
+ const uint8_t *point = memchr(start, '.', length);
4348
+ assert(point && "should have a decimal point");
4349
+
4350
+ uint8_t *digits = malloc(length);
4351
+ if (digits == NULL) {
4352
+ fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4353
+ abort();
4354
+ }
4355
+
4356
+ memcpy(digits, start, (unsigned long) (point - start));
4357
+ memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4358
+ pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4359
+
4360
+ digits[0] = '1';
4361
+ if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4362
+ pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4363
+ free(digits);
4364
+
4365
+ pm_integers_reduce(&node->numerator, &node->denominator);
4259
4366
  return node;
4260
4367
  }
4261
4368
 
@@ -4505,9 +4612,9 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
4505
4612
  },
4506
4613
  .name = pm_global_variable_write_name(parser, target),
4507
4614
  .name_loc = target->location,
4508
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4615
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4509
4616
  .value = value,
4510
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4617
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4511
4618
  };
4512
4619
 
4513
4620
  return node;
@@ -4566,7 +4673,7 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant
4566
4673
  *node = (pm_global_variable_read_node_t) {
4567
4674
  {
4568
4675
  .type = PM_GLOBAL_VARIABLE_READ_NODE,
4569
- .location = { .start = parser->start, .end = parser->start }
4676
+ .location = PM_LOCATION_NULL_VALUE(parser)
4570
4677
  },
4571
4678
  .name = name
4572
4679
  };
@@ -4608,11 +4715,11 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan
4608
4715
  *node = (pm_global_variable_write_node_t) {
4609
4716
  {
4610
4717
  .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4611
- .location = { .start = parser->start, .end = parser->start }
4718
+ .location = PM_LOCATION_NULL_VALUE(parser)
4612
4719
  },
4613
4720
  .name = name,
4614
- .name_loc = { .start = parser->start, .end = parser->start },
4615
- .operator_loc = { .start = parser->start, .end = parser->start },
4721
+ .name_loc = PM_LOCATION_NULL_VALUE(parser),
4722
+ .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4616
4723
  .value = value
4617
4724
  };
4618
4725
 
@@ -4889,7 +4996,7 @@ pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, cons
4889
4996
  }
4890
4997
 
4891
4998
  /**
4892
- * Allocate and initialize a new IntegerNode node from an INTEGER_RATIONAL
4999
+ * Allocate and initialize a new RationalNode node from an INTEGER_RATIONAL
4893
5000
  * token.
4894
5001
  */
4895
5002
  static pm_rational_node_t *
@@ -4900,16 +5007,24 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const
4900
5007
  *node = (pm_rational_node_t) {
4901
5008
  {
4902
5009
  .type = PM_RATIONAL_NODE,
4903
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
5010
+ .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4904
5011
  .location = PM_LOCATION_TOKEN_VALUE(token)
4905
5012
  },
4906
- .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4907
- .type = PM_TOKEN_INTEGER,
4908
- .start = token->start,
4909
- .end = token->end - 1
4910
- }))
5013
+ .numerator = { 0 },
5014
+ .denominator = { .value = 1, 0 }
4911
5015
  };
4912
5016
 
5017
+ pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
5018
+ switch (base) {
5019
+ case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
5020
+ case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
5021
+ case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
5022
+ case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
5023
+ default: assert(false && "unreachable"); break;
5024
+ }
5025
+
5026
+ pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
5027
+
4913
5028
  return node;
4914
5029
  }
4915
5030
 
@@ -5013,9 +5128,9 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
5013
5128
  },
5014
5129
  .name = target->name,
5015
5130
  .name_loc = target->base.location,
5016
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5131
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5017
5132
  .value = value,
5018
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5133
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5019
5134
  };
5020
5135
 
5021
5136
  return node;
@@ -5407,6 +5522,23 @@ pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node,
5407
5522
  node->base.location.end = closing->end;
5408
5523
  }
5409
5524
 
5525
+ /**
5526
+ * Create a local variable read that is reading the implicit 'it' variable.
5527
+ */
5528
+ static pm_it_local_variable_read_node_t *
5529
+ pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5530
+ pm_it_local_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_it_local_variable_read_node_t);
5531
+
5532
+ *node = (pm_it_local_variable_read_node_t) {
5533
+ {
5534
+ .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5535
+ .location = PM_LOCATION_TOKEN_VALUE(name)
5536
+ }
5537
+ };
5538
+
5539
+ return node;
5540
+ }
5541
+
5410
5542
  /**
5411
5543
  * Allocate and initialize a new ItParametersNode node.
5412
5544
  */
@@ -5609,10 +5741,10 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
5609
5741
  }
5610
5742
  },
5611
5743
  .name_loc = target->location,
5612
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5744
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5613
5745
  .value = value,
5614
5746
  .name = name,
5615
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5747
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5616
5748
  .depth = depth
5617
5749
  };
5618
5750
 
@@ -5719,28 +5851,6 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5719
5851
  return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5720
5852
  }
5721
5853
 
5722
- /**
5723
- * Returns true if the given node is `it` default parameter.
5724
- */
5725
- static inline bool
5726
- pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
5727
- // Check if it's a local variable reference
5728
- if (node->type != PM_CALL_NODE) {
5729
- return false;
5730
- }
5731
-
5732
- // Check if it's a variable call
5733
- pm_call_node_t *call_node = (pm_call_node_t *) node;
5734
- if (!PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
5735
- return false;
5736
- }
5737
-
5738
- // Check if it's called `it`
5739
- pm_constant_id_t id = ((pm_call_node_t *)node)->name;
5740
- pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
5741
- return pm_token_is_it(constant->start, constant->start + constant->length);
5742
- }
5743
-
5744
5854
  /**
5745
5855
  * Returns true if the given bounds comprise a numbered parameter (i.e., they
5746
5856
  * are of the form /^_\d$/).
@@ -6891,7 +7001,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
6891
7001
  case PM_REDO_NODE:
6892
7002
  case PM_RETRY_NODE:
6893
7003
  case PM_RETURN_NODE:
6894
- pm_parser_warn_node(parser, previous, PM_WARN_UNREACHABLE_STATEMENT);
7004
+ pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6895
7005
  break;
6896
7006
  default:
6897
7007
  break;
@@ -7300,9 +7410,9 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7300
7410
  {
7301
7411
  .type = PM_SYMBOL_NODE,
7302
7412
  .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7303
- .location = { .start = parser->start, .end = parser->start }
7413
+ .location = PM_LOCATION_NULL_VALUE(parser)
7304
7414
  },
7305
- .value_loc = { .start = parser->start, .end = parser->start },
7415
+ .value_loc = PM_LOCATION_NULL_VALUE(parser),
7306
7416
  .unescaped = { 0 }
7307
7417
  };
7308
7418
 
@@ -7703,10 +7813,10 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
7703
7813
  *node = (pm_while_node_t) {
7704
7814
  {
7705
7815
  .type = PM_WHILE_NODE,
7706
- .location = { .start = parser->start, .end = parser->start }
7816
+ .location = PM_LOCATION_NULL_VALUE(parser)
7707
7817
  },
7708
- .keyword_loc = { .start = parser->start, .end = parser->start },
7709
- .closing_loc = { .start = parser->start, .end = parser->start },
7818
+ .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7819
+ .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7710
7820
  .predicate = predicate,
7711
7821
  .statements = statements
7712
7822
  };
@@ -7861,51 +7971,6 @@ pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t leng
7861
7971
  return constant_id;
7862
7972
  }
7863
7973
 
7864
- /**
7865
- * Create a local variable read that is reading the implicit 'it' variable.
7866
- */
7867
- static pm_local_variable_read_node_t *
7868
- pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *name) {
7869
- if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_ORDINARY) {
7870
- pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
7871
- return NULL;
7872
- }
7873
-
7874
- if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED) {
7875
- pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
7876
- return NULL;
7877
- }
7878
-
7879
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IT;
7880
-
7881
- pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
7882
- pm_parser_local_add(parser, name_id, name->start, name->end, 0);
7883
-
7884
- return pm_local_variable_read_node_create_constant_id(parser, name, name_id, 0, false);
7885
- }
7886
-
7887
- /**
7888
- * Convert a `it` variable call node to a node for `it` default parameter.
7889
- */
7890
- static pm_node_t *
7891
- pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
7892
- if (
7893
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
7894
- !parser->current_scope->closed &&
7895
- (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
7896
- pm_node_is_it(parser, node)
7897
- ) {
7898
- pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
7899
-
7900
- if (read != NULL) {
7901
- pm_node_destroy(parser, node);
7902
- node = (pm_node_t *) read;
7903
- }
7904
- }
7905
-
7906
- return node;
7907
- }
7908
-
7909
7974
  /**
7910
7975
  * Add a parameter name to the current scope and check whether the name of the
7911
7976
  * parameter is unique or not.
@@ -7941,6 +8006,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
7941
8006
  pm_scope_t *scope = parser->current_scope;
7942
8007
  parser->current_scope = scope->previous;
7943
8008
  pm_locals_free(&scope->locals);
8009
+ pm_node_list_free(&scope->implicit_parameters);
7944
8010
  xfree(scope);
7945
8011
  }
7946
8012
 
@@ -8012,7 +8078,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) {
8012
8078
  * is beyond the end of the source then return '\0'.
8013
8079
  */
8014
8080
  static inline uint8_t
8015
- peek_at(pm_parser_t *parser, const uint8_t *cursor) {
8081
+ peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8016
8082
  if (cursor < parser->end) {
8017
8083
  return *cursor;
8018
8084
  } else {
@@ -8035,7 +8101,7 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8035
8101
  * that position is beyond the end of the source then return '\0'.
8036
8102
  */
8037
8103
  static inline uint8_t
8038
- peek(pm_parser_t *parser) {
8104
+ peek(const pm_parser_t *parser) {
8039
8105
  return peek_at(parser, parser->current.end);
8040
8106
  }
8041
8107
 
@@ -8100,6 +8166,14 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
8100
8166
  return memchr(cursor, '\n', (size_t) length);
8101
8167
  }
8102
8168
 
8169
+ /**
8170
+ * This is equivalent to the predicate of warn_balanced in CRuby.
8171
+ */
8172
+ static inline bool
8173
+ ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8174
+ return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8175
+ }
8176
+
8103
8177
  /**
8104
8178
  * Here we're going to check if this is a "magic" comment, and perform whatever
8105
8179
  * actions are necessary for it here.
@@ -8339,7 +8413,12 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8339
8413
  // If we have hit a ractor pragma, attempt to lex that.
8340
8414
  uint32_t value_length = (uint32_t) (value_end - value_start);
8341
8415
  if (key_length == 24 && pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8342
- if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8416
+ const uint8_t *cursor = parser->current.start;
8417
+ while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8418
+
8419
+ if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8420
+ pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8421
+ } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8343
8422
  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8344
8423
  } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8345
8424
  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
@@ -8796,6 +8875,16 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8796
8875
  type = lex_optional_float_suffix(parser, seen_e);
8797
8876
  }
8798
8877
 
8878
+ // At this point we have a completed number, but we want to provide the user
8879
+ // with a good experience if they put an additional .xxx fractional
8880
+ // component on the end, so we'll check for that here.
8881
+ if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8882
+ const uint8_t *fraction_start = parser->current.end;
8883
+ const uint8_t *fraction_end = parser->current.end + 2;
8884
+ fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8885
+ pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8886
+ }
8887
+
8799
8888
  return type;
8800
8889
  }
8801
8890
 
@@ -8925,8 +9014,8 @@ lex_global_variable(pm_parser_t *parser) {
8925
9014
  // If we get here, then we have a $ followed by something that
8926
9015
  // isn't recognized as a global variable.
8927
9016
  pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8928
- size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8929
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
9017
+ const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9018
+ PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
8930
9019
  }
8931
9020
 
8932
9021
  return PM_TOKEN_GLOBAL_VARIABLE;
@@ -9297,12 +9386,20 @@ escape_hexadecimal_digit(const uint8_t value) {
9297
9386
  * validated.
9298
9387
  */
9299
9388
  static inline uint32_t
9300
- escape_unicode(const uint8_t *string, size_t length) {
9389
+ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9301
9390
  uint32_t value = 0;
9302
9391
  for (size_t index = 0; index < length; index++) {
9303
9392
  if (index != 0) value <<= 4;
9304
9393
  value |= escape_hexadecimal_digit(string[index]);
9305
9394
  }
9395
+
9396
+ // Here we're going to verify that the value is actually a valid Unicode
9397
+ // codepoint and not a surrogate pair.
9398
+ if (value >= 0xD800 && value <= 0xDFFF) {
9399
+ pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9400
+ return 0xFFFD;
9401
+ }
9402
+
9306
9403
  return value;
9307
9404
  }
9308
9405
 
@@ -9311,7 +9408,7 @@ escape_unicode(const uint8_t *string, size_t length) {
9311
9408
  */
9312
9409
  static inline uint8_t
9313
9410
  escape_byte(uint8_t value, const uint8_t flags) {
9314
- if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x1f;
9411
+ if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9315
9412
  if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9316
9413
  return value;
9317
9414
  }
@@ -9411,22 +9508,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
9411
9508
  static inline void
9412
9509
  escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9413
9510
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9414
- pm_buffer_append_bytes(regular_expression_buffer, (const uint8_t *) "\\x", 2);
9415
-
9416
- uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
9417
- uint8_t byte2 = (uint8_t) (byte & 0xF);
9418
-
9419
- if (byte1 >= 0xA) {
9420
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
9421
- } else {
9422
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte1 + '0'));
9423
- }
9424
-
9425
- if (byte2 >= 0xA) {
9426
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 - 0xA + 'A'));
9427
- } else {
9428
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 + '0'));
9429
- }
9511
+ pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9430
9512
  }
9431
9513
 
9432
9514
  escape_write_byte_encoded(parser, buffer, byte);
@@ -9461,57 +9543,57 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9461
9543
  switch (peek(parser)) {
9462
9544
  case '\\': {
9463
9545
  parser->current.end++;
9464
- escape_write_byte_encoded(parser, buffer, escape_byte('\\', flags));
9546
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9465
9547
  return;
9466
9548
  }
9467
9549
  case '\'': {
9468
9550
  parser->current.end++;
9469
- escape_write_byte_encoded(parser, buffer, escape_byte('\'', flags));
9551
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9470
9552
  return;
9471
9553
  }
9472
9554
  case 'a': {
9473
9555
  parser->current.end++;
9474
- escape_write_byte_encoded(parser, buffer, escape_byte('\a', flags));
9556
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9475
9557
  return;
9476
9558
  }
9477
9559
  case 'b': {
9478
9560
  parser->current.end++;
9479
- escape_write_byte_encoded(parser, buffer, escape_byte('\b', flags));
9561
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9480
9562
  return;
9481
9563
  }
9482
9564
  case 'e': {
9483
9565
  parser->current.end++;
9484
- escape_write_byte_encoded(parser, buffer, escape_byte('\033', flags));
9566
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9485
9567
  return;
9486
9568
  }
9487
9569
  case 'f': {
9488
9570
  parser->current.end++;
9489
- escape_write_byte_encoded(parser, buffer, escape_byte('\f', flags));
9571
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9490
9572
  return;
9491
9573
  }
9492
9574
  case 'n': {
9493
9575
  parser->current.end++;
9494
- escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
9576
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9495
9577
  return;
9496
9578
  }
9497
9579
  case 'r': {
9498
9580
  parser->current.end++;
9499
- escape_write_byte_encoded(parser, buffer, escape_byte('\r', flags));
9581
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9500
9582
  return;
9501
9583
  }
9502
9584
  case 's': {
9503
9585
  parser->current.end++;
9504
- escape_write_byte_encoded(parser, buffer, escape_byte(' ', flags));
9586
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9505
9587
  return;
9506
9588
  }
9507
9589
  case 't': {
9508
9590
  parser->current.end++;
9509
- escape_write_byte_encoded(parser, buffer, escape_byte('\t', flags));
9591
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9510
9592
  return;
9511
9593
  }
9512
9594
  case 'v': {
9513
9595
  parser->current.end++;
9514
- escape_write_byte_encoded(parser, buffer, escape_byte('\v', flags));
9596
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9515
9597
  return;
9516
9598
  }
9517
9599
  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
@@ -9528,7 +9610,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9528
9610
  }
9529
9611
  }
9530
9612
 
9531
- escape_write_byte_encoded(parser, buffer, value);
9613
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9532
9614
  return;
9533
9615
  }
9534
9616
  case 'x': {
@@ -9547,8 +9629,13 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9547
9629
  parser->current.end++;
9548
9630
  }
9549
9631
 
9632
+ value = escape_byte(value, flags);
9550
9633
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9551
- pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9634
+ if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9635
+ pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9636
+ } else {
9637
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9638
+ }
9552
9639
  }
9553
9640
 
9554
9641
  escape_write_byte_encoded(parser, buffer, value);
@@ -9580,7 +9667,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9580
9667
  pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9581
9668
  } else if (hexadecimal_length == 0) {
9582
9669
  // there are not hexadecimal characters
9583
- pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE);
9670
+ pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9671
+ pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9584
9672
  return;
9585
9673
  }
9586
9674
 
@@ -9590,7 +9678,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9590
9678
  extra_codepoints_start = unicode_start;
9591
9679
  }
9592
9680
 
9593
- uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
9681
+ uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9594
9682
  escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9595
9683
 
9596
9684
  parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
@@ -9615,7 +9703,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9615
9703
  size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9616
9704
 
9617
9705
  if (length == 4) {
9618
- uint32_t value = escape_unicode(parser->current.end, 4);
9706
+ uint32_t value = escape_unicode(parser, parser->current.end, 4);
9619
9707
 
9620
9708
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9621
9709
  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
@@ -9651,6 +9739,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9651
9739
  return;
9652
9740
  }
9653
9741
  parser->current.end++;
9742
+
9743
+ if (match(parser, 'u') || match(parser, 'U')) {
9744
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9745
+ return;
9746
+ }
9747
+
9654
9748
  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9655
9749
  return;
9656
9750
  case ' ':
@@ -9678,7 +9772,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9678
9772
  case 'C': {
9679
9773
  parser->current.end++;
9680
9774
  if (peek(parser) != '-') {
9681
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9775
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9776
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9682
9777
  return;
9683
9778
  }
9684
9779
 
@@ -9701,6 +9796,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9701
9796
  return;
9702
9797
  }
9703
9798
  parser->current.end++;
9799
+
9800
+ if (match(parser, 'u') || match(parser, 'U')) {
9801
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9802
+ return;
9803
+ }
9804
+
9704
9805
  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9705
9806
  return;
9706
9807
  case ' ':
@@ -9715,7 +9816,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9715
9816
  return;
9716
9817
  default: {
9717
9818
  if (!char_is_ascii_printable(peeked)) {
9718
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9819
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9820
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9719
9821
  return;
9720
9822
  }
9721
9823
 
@@ -9728,7 +9830,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9728
9830
  case 'M': {
9729
9831
  parser->current.end++;
9730
9832
  if (peek(parser) != '-') {
9731
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9833
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9834
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9732
9835
  return;
9733
9836
  }
9734
9837
 
@@ -9746,6 +9849,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9746
9849
  return;
9747
9850
  }
9748
9851
  parser->current.end++;
9852
+
9853
+ if (match(parser, 'u') || match(parser, 'U')) {
9854
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9855
+ return;
9856
+ }
9857
+
9749
9858
  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9750
9859
  return;
9751
9860
  case ' ':
@@ -9760,7 +9869,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9760
9869
  return;
9761
9870
  default:
9762
9871
  if (!char_is_ascii_printable(peeked)) {
9763
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9872
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9873
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9764
9874
  return;
9765
9875
  }
9766
9876
 
@@ -10721,6 +10831,8 @@ parser_lex(pm_parser_t *parser) {
10721
10831
  type = PM_TOKEN_USTAR_STAR;
10722
10832
  } else if (lex_state_beg_p(parser)) {
10723
10833
  type = PM_TOKEN_USTAR_STAR;
10834
+ } else if (ambiguous_operator_p(parser, space_seen)) {
10835
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10724
10836
  }
10725
10837
 
10726
10838
  if (lex_state_operator_p(parser)) {
@@ -10744,6 +10856,8 @@ parser_lex(pm_parser_t *parser) {
10744
10856
  type = PM_TOKEN_USTAR;
10745
10857
  } else if (lex_state_beg_p(parser)) {
10746
10858
  type = PM_TOKEN_USTAR;
10859
+ } else if (ambiguous_operator_p(parser, space_seen)) {
10860
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
10747
10861
  }
10748
10862
 
10749
10863
  if (lex_state_operator_p(parser)) {
@@ -10860,6 +10974,7 @@ parser_lex(pm_parser_t *parser) {
10860
10974
  // If we have quotes, then we're going to go until we find the
10861
10975
  // end quote.
10862
10976
  while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
10977
+ if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
10863
10978
  parser->current.end++;
10864
10979
  }
10865
10980
  }
@@ -10917,6 +11032,10 @@ parser_lex(pm_parser_t *parser) {
10917
11032
  LEX(PM_TOKEN_LESS_LESS_EQUAL);
10918
11033
  }
10919
11034
 
11035
+ if (ambiguous_operator_p(parser, space_seen)) {
11036
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11037
+ }
11038
+
10920
11039
  if (lex_state_operator_p(parser)) {
10921
11040
  lex_state_set(parser, PM_LEX_STATE_ARG);
10922
11041
  } else {
@@ -11030,6 +11149,8 @@ parser_lex(pm_parser_t *parser) {
11030
11149
  type = PM_TOKEN_UAMPERSAND;
11031
11150
  } else if (lex_state_beg_p(parser)) {
11032
11151
  type = PM_TOKEN_UAMPERSAND;
11152
+ } else if (ambiguous_operator_p(parser, space_seen)) {
11153
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11033
11154
  }
11034
11155
 
11035
11156
  if (lex_state_operator_p(parser)) {
@@ -11104,6 +11225,10 @@ parser_lex(pm_parser_t *parser) {
11104
11225
  LEX(PM_TOKEN_UPLUS);
11105
11226
  }
11106
11227
 
11228
+ if (ambiguous_operator_p(parser, space_seen)) {
11229
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11230
+ }
11231
+
11107
11232
  lex_state_set(parser, PM_LEX_STATE_BEG);
11108
11233
  LEX(PM_TOKEN_PLUS);
11109
11234
  }
@@ -11141,6 +11266,10 @@ parser_lex(pm_parser_t *parser) {
11141
11266
  LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
11142
11267
  }
11143
11268
 
11269
+ if (ambiguous_operator_p(parser, space_seen)) {
11270
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11271
+ }
11272
+
11144
11273
  lex_state_set(parser, PM_LEX_STATE_BEG);
11145
11274
  LEX(PM_TOKEN_MINUS);
11146
11275
  }
@@ -11239,6 +11368,10 @@ parser_lex(pm_parser_t *parser) {
11239
11368
  LEX(PM_TOKEN_REGEXP_BEGIN);
11240
11369
  }
11241
11370
 
11371
+ if (ambiguous_operator_p(parser, space_seen)) {
11372
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11373
+ }
11374
+
11242
11375
  if (lex_state_operator_p(parser)) {
11243
11376
  lex_state_set(parser, PM_LEX_STATE_ARG);
11244
11377
  } else {
@@ -11274,7 +11407,7 @@ parser_lex(pm_parser_t *parser) {
11274
11407
  // operator because we don't want to move into the string
11275
11408
  // lex mode unnecessarily.
11276
11409
  if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11277
- pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11410
+ pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11278
11411
  LEX(PM_TOKEN_PERCENT);
11279
11412
  }
11280
11413
 
@@ -11293,10 +11426,7 @@ parser_lex(pm_parser_t *parser) {
11293
11426
 
11294
11427
  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11295
11428
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11296
-
11297
- if (parser->current.end < parser->end) {
11298
- LEX(PM_TOKEN_STRING_BEGIN);
11299
- }
11429
+ LEX(PM_TOKEN_STRING_BEGIN);
11300
11430
  }
11301
11431
 
11302
11432
  // Delimiters for %-literals cannot be alphanumeric. We
@@ -11423,6 +11553,10 @@ parser_lex(pm_parser_t *parser) {
11423
11553
  }
11424
11554
  }
11425
11555
 
11556
+ if (ambiguous_operator_p(parser, space_seen)) {
11557
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11558
+ }
11559
+
11426
11560
  lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11427
11561
  LEX(PM_TOKEN_PERCENT);
11428
11562
  }
@@ -12233,9 +12367,10 @@ parser_lex(pm_parser_t *parser) {
12233
12367
 
12234
12368
  // If we are immediately following a newline and we have hit the
12235
12369
  // terminator, then we need to return the ending of the heredoc.
12236
- if (!line_continuation && current_token_starts_line(parser)) {
12370
+ if (current_token_starts_line(parser)) {
12237
12371
  const uint8_t *start = parser->current.start;
12238
- if (start + ident_length <= parser->end) {
12372
+
12373
+ if (!line_continuation && (start + ident_length <= parser->end)) {
12239
12374
  const uint8_t *newline = next_newline(start, parser->end - start);
12240
12375
  const uint8_t *ident_end = newline;
12241
12376
  const uint8_t *terminator_end = newline;
@@ -12391,11 +12526,8 @@ parser_lex(pm_parser_t *parser) {
12391
12526
  }
12392
12527
 
12393
12528
  parser->current.end = breakpoint + 1;
12394
-
12395
- if (!was_line_continuation) {
12396
- pm_token_buffer_flush(parser, &token_buffer);
12397
- LEX(PM_TOKEN_STRING_CONTENT);
12398
- }
12529
+ pm_token_buffer_flush(parser, &token_buffer);
12530
+ LEX(PM_TOKEN_STRING_CONTENT);
12399
12531
  }
12400
12532
 
12401
12533
  // Otherwise we hit a newline and it wasn't followed by
@@ -13030,11 +13162,40 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13030
13162
  return (pm_node_t *) result;
13031
13163
  }
13032
13164
 
13165
+ /**
13166
+ * When an implicit local variable is written to or targeted, it becomes a
13167
+ * regular, named local variable. This function removes it from the list of
13168
+ * implicit parameters when that happens.
13169
+ */
13170
+ static void
13171
+ parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13172
+ pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13173
+
13174
+ for (size_t index = 0; index < implicit_parameters->size; index++) {
13175
+ if (implicit_parameters->nodes[index] == node) {
13176
+ // If the node is not the last one in the list, we need to shift the
13177
+ // remaining nodes down to fill the gap. This is extremely unlikely
13178
+ // to happen.
13179
+ if (index != implicit_parameters->size - 1) {
13180
+ memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13181
+ }
13182
+
13183
+ implicit_parameters->size--;
13184
+ break;
13185
+ }
13186
+ }
13187
+ }
13188
+
13033
13189
  /**
13034
13190
  * Convert the given node into a valid target node.
13191
+ *
13192
+ * @param multiple Whether or not this target is part of a larger set of
13193
+ * targets. If it is, then the &. operator is not allowed.
13194
+ * @param splat Whether or not this target is a child of a splat target. If it
13195
+ * is, then fewer patterns are allowed.
13035
13196
  */
13036
13197
  static pm_node_t *
13037
- parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13198
+ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13038
13199
  switch (PM_NODE_TYPE(target)) {
13039
13200
  case PM_MISSING_NODE:
13040
13201
  return target;
@@ -13080,7 +13241,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13080
13241
  target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
13081
13242
  return target;
13082
13243
  case PM_LOCAL_VARIABLE_READ_NODE: {
13083
- pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
13244
+ if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13245
+ PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13246
+ parse_target_implicit_parameter(parser, target);
13247
+ }
13084
13248
 
13085
13249
  const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13086
13250
  uint32_t name = cast->name;
@@ -13092,17 +13256,32 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13092
13256
 
13093
13257
  return target;
13094
13258
  }
13259
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13260
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13261
+ pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13262
+
13263
+ parse_target_implicit_parameter(parser, target);
13264
+ pm_node_destroy(parser, target);
13265
+
13266
+ return node;
13267
+ }
13095
13268
  case PM_INSTANCE_VARIABLE_READ_NODE:
13096
13269
  assert(sizeof(pm_instance_variable_target_node_t) == sizeof(pm_instance_variable_read_node_t));
13097
13270
  target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
13098
13271
  return target;
13099
13272
  case PM_MULTI_TARGET_NODE:
13273
+ if (splat_parent) {
13274
+ // Multi target is not accepted in all positions. If this is one
13275
+ // of them, then we need to add an error.
13276
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13277
+ }
13278
+
13100
13279
  return target;
13101
13280
  case PM_SPLAT_NODE: {
13102
13281
  pm_splat_node_t *splat = (pm_splat_node_t *) target;
13103
13282
 
13104
13283
  if (splat->expression != NULL) {
13105
- splat->expression = parse_target(parser, splat->expression, multiple);
13284
+ splat->expression = parse_target(parser, splat->expression, multiple, true);
13106
13285
  }
13107
13286
 
13108
13287
  return (pm_node_t *) splat;
@@ -13172,9 +13351,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13172
13351
  */
13173
13352
  static pm_node_t *
13174
13353
  parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13175
- pm_node_t *result = parse_target(parser, target, multiple);
13354
+ pm_node_t *result = parse_target(parser, target, multiple, false);
13176
13355
 
13177
- // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
13356
+ // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13357
+ // parens after the targets.
13178
13358
  if (
13179
13359
  !match1(parser, PM_TOKEN_EQUAL) &&
13180
13360
  !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
@@ -13244,18 +13424,34 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13244
13424
  return (pm_node_t *) node;
13245
13425
  }
13246
13426
  case PM_LOCAL_VARIABLE_READ_NODE: {
13247
- pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
13248
13427
  pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
13249
13428
 
13250
13429
  pm_constant_id_t name = local_read->name;
13430
+ pm_location_t name_loc = target->location;
13431
+
13251
13432
  uint32_t depth = local_read->depth;
13252
- pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13433
+ pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13253
13434
 
13254
- pm_location_t name_loc = target->location;
13435
+ if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13436
+ pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13437
+ PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13438
+ parse_target_implicit_parameter(parser, target);
13439
+ }
13440
+
13441
+ pm_locals_unread(&scope->locals, name);
13255
13442
  pm_node_destroy(parser, target);
13256
13443
 
13257
13444
  return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13258
13445
  }
13446
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13447
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13448
+ pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13449
+
13450
+ parse_target_implicit_parameter(parser, target);
13451
+ pm_node_destroy(parser, target);
13452
+
13453
+ return node;
13454
+ }
13259
13455
  case PM_INSTANCE_VARIABLE_READ_NODE: {
13260
13456
  pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13261
13457
  pm_node_destroy(parser, target);
@@ -13409,7 +13605,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13409
13605
  bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13410
13606
 
13411
13607
  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13412
- pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
13608
+ pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13413
13609
 
13414
13610
  while (accept1(parser, PM_TOKEN_COMMA)) {
13415
13611
  if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -13425,7 +13621,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13425
13621
 
13426
13622
  if (token_begins_expression_p(parser->current.type)) {
13427
13623
  name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
13428
- name = parse_target(parser, name, true);
13624
+ name = parse_target(parser, name, true, true);
13429
13625
  }
13430
13626
 
13431
13627
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
@@ -13433,7 +13629,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13433
13629
  has_rest = true;
13434
13630
  } else if (token_begins_expression_p(parser->current.type)) {
13435
13631
  pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
13436
- target = parse_target(parser, target, true);
13632
+ target = parse_target(parser, target, true, false);
13437
13633
 
13438
13634
  pm_multi_target_node_targets_append(parser, result, target);
13439
13635
  } else if (!match1(parser, PM_TOKEN_EOF)) {
@@ -13470,8 +13666,8 @@ parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_
13470
13666
  */
13471
13667
  static pm_statements_node_t *
13472
13668
  parse_statements(pm_parser_t *parser, pm_context_t context) {
13473
- // First, skip past any optional terminators that might be at the beginning of
13474
- // the statements.
13669
+ // First, skip past any optional terminators that might be at the beginning
13670
+ // of the statements.
13475
13671
  while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13476
13672
 
13477
13673
  // If we have a terminator, then we can just return NULL.
@@ -13487,20 +13683,20 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13487
13683
  pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
13488
13684
  pm_statements_node_body_append(parser, statements, node);
13489
13685
 
13490
- // If we're recovering from a syntax error, then we need to stop parsing the
13491
- // statements now.
13686
+ // If we're recovering from a syntax error, then we need to stop parsing
13687
+ // the statements now.
13492
13688
  if (parser->recovering) {
13493
- // If this is the level of context where the recovery has happened, then
13494
- // we can mark the parser as done recovering.
13689
+ // If this is the level of context where the recovery has happened,
13690
+ // then we can mark the parser as done recovering.
13495
13691
  if (context_terminator(context, &parser->current)) parser->recovering = false;
13496
13692
  break;
13497
13693
  }
13498
13694
 
13499
- // If we have a terminator, then we will parse all consecutive terminators
13500
- // and then continue parsing the statements list.
13695
+ // If we have a terminator, then we will parse all consecutive
13696
+ // terminators and then continue parsing the statements list.
13501
13697
  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13502
- // If we have a terminator, then we will continue parsing the statements
13503
- // list.
13698
+ // If we have a terminator, then we will continue parsing the
13699
+ // statements list.
13504
13700
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13505
13701
  if (context_terminator(context, &parser->current)) break;
13506
13702
 
@@ -13508,27 +13704,28 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13508
13704
  continue;
13509
13705
  }
13510
13706
 
13511
- // At this point we have a list of statements that are not terminated by a
13512
- // newline or semicolon. At this point we need to check if we're at the end
13513
- // of the statements list. If we are, then we should break out of the loop.
13707
+ // At this point we have a list of statements that are not terminated by
13708
+ // a newline or semicolon. At this point we need to check if we're at
13709
+ // the end of the statements list. If we are, then we should break out
13710
+ // of the loop.
13514
13711
  if (context_terminator(context, &parser->current)) break;
13515
13712
 
13516
13713
  // At this point, we have a syntax error, because the statement was not
13517
13714
  // terminated by a newline or semicolon, and we're not at the end of the
13518
- // statements list. Ideally we should scan forward to determine if we should
13519
- // insert a missing terminator or break out of parsing the statements list
13520
- // at this point.
13715
+ // statements list. Ideally we should scan forward to determine if we
13716
+ // should insert a missing terminator or break out of parsing the
13717
+ // statements list at this point.
13521
13718
  //
13522
- // We don't have that yet, so instead we'll do a more naive approach. If we
13523
- // were unable to parse an expression, then we will skip past this token and
13524
- // continue parsing the statements list. Otherwise we'll add an error and
13525
- // continue parsing the statements list.
13719
+ // We don't have that yet, so instead we'll do a more naive approach. If
13720
+ // we were unable to parse an expression, then we will skip past this
13721
+ // token and continue parsing the statements list. Otherwise we'll add
13722
+ // an error and continue parsing the statements list.
13526
13723
  if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13527
13724
  parser_lex(parser);
13528
13725
 
13529
13726
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13530
13727
  if (context_terminator(context, &parser->current)) break;
13531
- } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
13728
+ } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13532
13729
  // This is an inlined version of accept1 because the error that we
13533
13730
  // want to add has varargs. If this happens again, we should
13534
13731
  // probably extract a helper function.
@@ -13550,7 +13747,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13550
13747
  */
13551
13748
  static void
13552
13749
  pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13553
- const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
13750
+ const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13554
13751
 
13555
13752
  if (duplicated != NULL) {
13556
13753
  pm_buffer_t buffer = { 0 };
@@ -13576,13 +13773,16 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
13576
13773
  */
13577
13774
  static void
13578
13775
  pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13579
- if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) {
13776
+ pm_node_t *previous;
13777
+
13778
+ if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
13580
13779
  pm_diagnostic_list_append_format(
13581
13780
  &parser->warning_list,
13582
13781
  node->location.start,
13583
13782
  node->location.end,
13584
13783
  PM_WARN_DUPLICATED_WHEN_CLAUSE,
13585
- pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
13784
+ pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
13785
+ pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
13586
13786
  );
13587
13787
  }
13588
13788
  }
@@ -14036,31 +14236,37 @@ static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14036
14236
  * Check if current parameter follows valid parameters ordering. If not it adds
14037
14237
  * an error to the list without stopping the parsing, otherwise sets the
14038
14238
  * parameters state to the one corresponding to the current parameter.
14239
+ *
14240
+ * It returns true if it was successful, and false otherwise.
14039
14241
  */
14040
- static void
14242
+ static bool
14041
14243
  update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14042
14244
  pm_parameters_order_t state = parameters_ordering[token->type];
14043
- if (state == PM_PARAMETERS_NO_CHANGE) return;
14245
+ if (state == PM_PARAMETERS_NO_CHANGE) return true;
14044
14246
 
14045
14247
  // If we see another ordered argument after a optional argument
14046
14248
  // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14047
14249
  if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14048
14250
  *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14049
- return;
14251
+ return true;
14050
14252
  } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14051
- return;
14253
+ return true;
14052
14254
  }
14053
14255
 
14054
14256
  if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14055
14257
  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14056
- }
14057
-
14058
- if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14258
+ return false;
14259
+ } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14260
+ pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14261
+ return false;
14262
+ } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14059
14263
  // We know what transition we failed on, so we can provide a better error here.
14060
14264
  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14061
- } else if (state < *current) {
14062
- *current = state;
14265
+ return false;
14063
14266
  }
14267
+
14268
+ if (state < *current) *current = state;
14269
+ return true;
14064
14270
  }
14065
14271
 
14066
14272
  /**
@@ -14129,27 +14335,22 @@ parse_parameters(
14129
14335
  pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14130
14336
  }
14131
14337
 
14132
- if (order > PM_PARAMETERS_ORDER_NOTHING_AFTER) {
14133
- update_parameter_state(parser, &parser->current, &order);
14134
- parser_lex(parser);
14338
+ bool succeeded = update_parameter_state(parser, &parser->current, &order);
14339
+ parser_lex(parser);
14135
14340
 
14136
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14341
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14342
+ pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14137
14343
 
14138
- pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14139
- if (params->keyword_rest != NULL) {
14140
- // If we already have a keyword rest parameter, then we replace it with the
14141
- // forwarding parameter and move the keyword rest parameter to the posts list.
14142
- pm_node_t *keyword_rest = params->keyword_rest;
14143
- pm_parameters_node_posts_append(params, keyword_rest);
14144
- pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14145
- params->keyword_rest = NULL;
14146
- }
14147
- pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
14148
- } else {
14149
- update_parameter_state(parser, &parser->current, &order);
14150
- parser_lex(parser);
14344
+ if (params->keyword_rest != NULL) {
14345
+ // If we already have a keyword rest parameter, then we replace it with the
14346
+ // forwarding parameter and move the keyword rest parameter to the posts list.
14347
+ pm_node_t *keyword_rest = params->keyword_rest;
14348
+ pm_parameters_node_posts_append(params, keyword_rest);
14349
+ if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14350
+ params->keyword_rest = NULL;
14151
14351
  }
14152
14352
 
14353
+ pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14153
14354
  break;
14154
14355
  }
14155
14356
  case PM_TOKEN_CLASS_VARIABLE:
@@ -14193,7 +14394,7 @@ parse_parameters(
14193
14394
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14194
14395
 
14195
14396
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14196
- uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
14397
+ uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14197
14398
 
14198
14399
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
14199
14400
  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
@@ -14206,7 +14407,7 @@ parse_parameters(
14206
14407
  // If the value of the parameter increased the number of
14207
14408
  // reads of that parameter, then we need to warn that we
14208
14409
  // have a circular definition.
14209
- if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
14410
+ if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14210
14411
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14211
14412
  }
14212
14413
 
@@ -14244,6 +14445,12 @@ parse_parameters(
14244
14445
  pm_token_t local = name;
14245
14446
  local.end -= 1;
14246
14447
 
14448
+ if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14449
+ pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14450
+ } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14451
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14452
+ }
14453
+
14247
14454
  bool repeated = pm_parser_parameter_name_check(parser, &local);
14248
14455
  pm_parser_local_add_token(parser, &local, 1);
14249
14456
 
@@ -14279,10 +14486,10 @@ parse_parameters(
14279
14486
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14280
14487
 
14281
14488
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14282
- uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
14489
+ uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14283
14490
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
14284
14491
 
14285
- if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
14492
+ if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14286
14493
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14287
14494
  }
14288
14495
 
@@ -14454,7 +14661,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14454
14661
  pm_rescue_node_operator_set(rescue, &parser->previous);
14455
14662
 
14456
14663
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14457
- reference = parse_target(parser, reference, false);
14664
+ reference = parse_target(parser, reference, false, false);
14458
14665
 
14459
14666
  pm_rescue_node_reference_set(rescue, reference);
14460
14667
  break;
@@ -14484,7 +14691,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14484
14691
  pm_rescue_node_operator_set(rescue, &parser->previous);
14485
14692
 
14486
14693
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14487
- reference = parse_target(parser, reference, false);
14694
+ reference = parse_target(parser, reference, false, false);
14488
14695
 
14489
14696
  pm_rescue_node_reference_set(rescue, reference);
14490
14697
  break;
@@ -14689,6 +14896,28 @@ parse_block_parameters(
14689
14896
  return block_parameters;
14690
14897
  }
14691
14898
 
14899
+ /**
14900
+ * Return true if any of the visible scopes to the current context are using
14901
+ * numbered parameters.
14902
+ */
14903
+ static bool
14904
+ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
14905
+ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14906
+ if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
14907
+ }
14908
+
14909
+ return false;
14910
+ }
14911
+
14912
+ /**
14913
+ * These are the names of the various numbered parameters. We have them here so
14914
+ * that when we insert them into the constant pool we can use a constant string
14915
+ * and not have to allocate.
14916
+ */
14917
+ static const char * const pm_numbered_parameter_names[] = {
14918
+ "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
14919
+ };
14920
+
14692
14921
  /**
14693
14922
  * Return the node that should be used in the parameters field of a block-like
14694
14923
  * (block or lambda) node, depending on the kind of parameters that were
@@ -14696,31 +14925,79 @@ parse_block_parameters(
14696
14925
  */
14697
14926
  static pm_node_t *
14698
14927
  parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
14699
- uint8_t masked = parser->current_scope->parameters & PM_SCOPE_PARAMETERS_TYPE_MASK;
14928
+ pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
14929
+
14930
+ // If we have ordinary parameters, then we will return them as the set of
14931
+ // parameters.
14932
+ if (parameters != NULL) {
14933
+ // If we also have implicit parameters, then this is an error.
14934
+ if (implicit_parameters->size > 0) {
14935
+ pm_node_t *node = implicit_parameters->nodes[0];
14936
+
14937
+ if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14938
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14939
+ } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14940
+ pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14941
+ } else {
14942
+ assert(false && "unreachable");
14943
+ }
14944
+ }
14700
14945
 
14701
- if (masked == PM_SCOPE_PARAMETERS_NONE) {
14702
- assert(parameters == NULL);
14703
- return NULL;
14704
- } else if (masked == PM_SCOPE_PARAMETERS_ORDINARY) {
14705
- assert(parameters != NULL);
14706
14946
  return parameters;
14707
- } else if (masked == PM_SCOPE_PARAMETERS_NUMBERED) {
14708
- assert(parameters == NULL);
14947
+ }
14709
14948
 
14710
- int8_t maximum = parser->current_scope->numbered_parameters;
14711
- if (maximum > 0) {
14712
- const pm_location_t location = { .start = opening->start, .end = closing->end };
14713
- return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, (uint8_t) maximum);
14949
+ // If we don't have any implicit parameters, then the set of parameters is
14950
+ // NULL.
14951
+ if (implicit_parameters->size == 0) {
14952
+ return NULL;
14953
+ }
14954
+
14955
+ // If we don't have ordinary parameters, then we now must validate our set
14956
+ // of implicit parameters. We can only have numbered parameters or it, but
14957
+ // they cannot be mixed.
14958
+ uint8_t numbered_parameter = 0;
14959
+ bool it_parameter = false;
14960
+
14961
+ for (size_t index = 0; index < implicit_parameters->size; index++) {
14962
+ pm_node_t *node = implicit_parameters->nodes[index];
14963
+
14964
+ if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14965
+ if (it_parameter) {
14966
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
14967
+ } else if (outer_scope_using_numbered_parameters_p(parser)) {
14968
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
14969
+ } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
14970
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
14971
+ } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
14972
+ numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
14973
+ } else {
14974
+ assert(false && "unreachable");
14975
+ }
14976
+ } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14977
+ if (numbered_parameter > 0) {
14978
+ pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
14979
+ } else {
14980
+ it_parameter = true;
14981
+ }
14714
14982
  }
14983
+ }
14715
14984
 
14716
- return NULL;
14717
- } else if (masked == PM_SCOPE_PARAMETERS_IT) {
14718
- assert(parameters == NULL);
14985
+ if (numbered_parameter > 0) {
14986
+ // Go through the parent scopes and mark them as being disallowed from
14987
+ // using numbered parameters because this inner scope is using them.
14988
+ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14989
+ scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
14990
+ }
14991
+
14992
+ const pm_location_t location = { .start = opening->start, .end = closing->end };
14993
+ return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
14994
+ }
14995
+
14996
+ if (it_parameter) {
14719
14997
  return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
14720
- } else {
14721
- assert(false && "unreachable");
14722
- return NULL;
14723
14998
  }
14999
+
15000
+ return NULL;
14724
15001
  }
14725
15002
 
14726
15003
  /**
@@ -14737,9 +15014,6 @@ parse_block(pm_parser_t *parser) {
14737
15014
  pm_block_parameters_node_t *block_parameters = NULL;
14738
15015
 
14739
15016
  if (accept1(parser, PM_TOKEN_PIPE)) {
14740
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
14741
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
14742
-
14743
15017
  pm_token_t block_parameters_opening = parser->previous;
14744
15018
  if (match1(parser, PM_TOKEN_PIPE)) {
14745
15019
  block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
@@ -14808,7 +15082,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14808
15082
  arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14809
15083
  } else {
14810
15084
  pm_accepts_block_stack_push(parser, true);
14811
- parse_arguments(parser, arguments, true, PM_TOKEN_PARENTHESIS_RIGHT);
15085
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT);
14812
15086
 
14813
15087
  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14814
15088
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
@@ -14826,7 +15100,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14826
15100
  // If we get here, then the subsequent token cannot be used as an infix
14827
15101
  // operator. In this case we assume the subsequent token is part of an
14828
15102
  // argument to this method call.
14829
- parse_arguments(parser, arguments, true, PM_TOKEN_EOF);
15103
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF);
14830
15104
 
14831
15105
  // If we have done with the arguments and still not consumed the comma,
14832
15106
  // then we have a trailing comma where we need to check whether it is
@@ -14857,11 +15131,8 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14857
15131
  if (arguments->block == NULL && !arguments->has_forwarding) {
14858
15132
  arguments->block = (pm_node_t *) block;
14859
15133
  } else {
14860
- if (arguments->has_forwarding) {
14861
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
14862
- } else {
14863
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
14864
- }
15134
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
15135
+
14865
15136
  if (arguments->block != NULL) {
14866
15137
  if (arguments->arguments == NULL) {
14867
15138
  arguments->arguments = pm_arguments_node_create(parser);
@@ -15240,7 +15511,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
15240
15511
  #define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15241
15512
  case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15242
15513
  case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15243
- case PM_NUMBERED_REFERENCE_READ_NODE
15514
+ case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15244
15515
 
15245
15516
  // Assert here that the flags are the same so that we can safely switch the type
15246
15517
  // of the node without having to move the flags.
@@ -15298,6 +15569,10 @@ parse_string_part(pm_parser_t *parser) {
15298
15569
  // "aaa #{bbb} #@ccc ddd"
15299
15570
  // ^^^^^^
15300
15571
  case PM_TOKEN_EMBEXPR_BEGIN: {
15572
+ // Ruby disallows seeing encoding around interpolation in strings,
15573
+ // even though it is known at parse time.
15574
+ parser->explicit_encoding = NULL;
15575
+
15301
15576
  pm_lex_state_t state = parser->lex_state;
15302
15577
  int brace_nesting = parser->brace_nesting;
15303
15578
 
@@ -15320,6 +15595,13 @@ parse_string_part(pm_parser_t *parser) {
15320
15595
  expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15321
15596
  pm_token_t closing = parser->previous;
15322
15597
 
15598
+ // If this set of embedded statements only contains a single
15599
+ // statement, then Ruby does not consider it as a possible statement
15600
+ // that could emit a line event.
15601
+ if (statements != NULL && statements->body.size == 1) {
15602
+ pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
15603
+ }
15604
+
15323
15605
  return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
15324
15606
  }
15325
15607
 
@@ -15330,6 +15612,10 @@ parse_string_part(pm_parser_t *parser) {
15330
15612
  // "aaa #{bbb} #@ccc ddd"
15331
15613
  // ^^^^^
15332
15614
  case PM_TOKEN_EMBVAR: {
15615
+ // Ruby disallows seeing encoding around interpolation in strings,
15616
+ // even though it is known at parse time.
15617
+ parser->explicit_encoding = NULL;
15618
+
15333
15619
  lex_state_set(parser, PM_LEX_STATE_BEG);
15334
15620
  parser_lex(parser);
15335
15621
 
@@ -15644,75 +15930,44 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
15644
15930
  }
15645
15931
  }
15646
15932
 
15647
- /**
15648
- * Return true if any of the visible scopes to the current context are using
15649
- * numbered parameters.
15650
- */
15651
- static bool
15652
- outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15653
- for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15654
- if (scope->numbered_parameters > 0) return true;
15655
- }
15656
-
15657
- return false;
15658
- }
15659
-
15660
- /**
15661
- * These are the names of the various numbered parameters. We have them here so
15662
- * that when we insert them into the constant pool we can use a constant string
15663
- * and not have to allocate.
15664
- */
15665
- static const char * const pm_numbered_parameter_names[] = {
15666
- "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15667
- };
15668
-
15669
15933
  /**
15670
15934
  * Parse an identifier into either a local variable read. If the local variable
15671
15935
  * is not found, it returns NULL instead.
15672
15936
  */
15673
- static pm_local_variable_read_node_t *
15937
+ static pm_node_t *
15674
15938
  parse_variable(pm_parser_t *parser) {
15939
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
15675
15940
  int depth;
15676
- if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
15677
- return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
15941
+
15942
+ if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
15943
+ return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
15678
15944
  }
15679
15945
 
15680
15946
  pm_scope_t *current_scope = parser->current_scope;
15681
- if (!current_scope->closed && current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
15682
- // Now that we know we have a numbered parameter, we need to check
15683
- // if it's allowed in this context. If it is, then we will create a
15684
- // local variable read. If it's not, then we'll create a normal call
15685
- // node but add an error.
15686
- if (current_scope->parameters & PM_SCOPE_PARAMETERS_ORDINARY) {
15687
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15688
- } else if (current_scope->parameters & PM_SCOPE_PARAMETERS_IT) {
15689
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_IT);
15690
- } else if (outer_scope_using_numbered_parameters_p(parser)) {
15691
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
15692
- } else {
15693
- // Indicate that this scope is using numbered params so that child
15694
- // scopes cannot. We subtract the value for the character '0' to get
15695
- // the actual integer value of the number (only _1 through _9 are
15696
- // valid).
15697
- int8_t numbered_parameters = (int8_t) (parser->previous.start[1] - '0');
15698
- current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED;
15699
-
15700
- if (numbered_parameters > current_scope->numbered_parameters) {
15701
- current_scope->numbered_parameters = numbered_parameters;
15947
+ if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
15948
+ if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
15949
+ // When you use a numbered parameter, it implies the existence of
15950
+ // all of the locals that exist before it. For example, referencing
15951
+ // _2 means that _1 must exist. Therefore here we loop through all
15952
+ // of the possibilities and add them into the constant pool.
15953
+ uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
15954
+ for (uint8_t number = 1; number <= maximum; number++) {
15955
+ pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
15702
15956
  }
15703
15957
 
15704
- // When you use a numbered parameter, it implies the existence
15705
- // of all of the locals that exist before it. For example,
15706
- // referencing _2 means that _1 must exist. Therefore here we
15707
- // loop through all of the possibilities and add them into the
15708
- // constant pool.
15709
- for (int8_t numbered_param = 1; numbered_param <= numbered_parameters - 1; numbered_param++) {
15710
- pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_param - 1], 2);
15958
+ if (!match1(parser, PM_TOKEN_EQUAL)) {
15959
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
15711
15960
  }
15712
15961
 
15713
- // Finally we can create the local variable read node.
15714
- pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
15715
- return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
15962
+ pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
15963
+ pm_node_list_append(&current_scope->implicit_parameters, node);
15964
+
15965
+ return node;
15966
+ } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
15967
+ pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
15968
+ pm_node_list_append(&current_scope->implicit_parameters, node);
15969
+
15970
+ return node;
15716
15971
  }
15717
15972
  }
15718
15973
 
@@ -15727,8 +15982,8 @@ parse_variable_call(pm_parser_t *parser) {
15727
15982
  pm_node_flags_t flags = 0;
15728
15983
 
15729
15984
  if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
15730
- pm_local_variable_read_node_t *node = parse_variable(parser);
15731
- if (node != NULL) return (pm_node_t *) node;
15985
+ pm_node_t *node = parse_variable(parser);
15986
+ if (node != NULL) return node;
15732
15987
  flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
15733
15988
  }
15734
15989
 
@@ -15846,127 +16101,355 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
15846
16101
  nodes->size = write_index;
15847
16102
  }
15848
16103
 
15849
- static pm_node_t *
15850
- parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id);
15851
-
15852
16104
  /**
15853
- * Add the newly created local to the list of captures for this pattern matching
15854
- * expression. If it is duplicated from a previous local, then we'll need to add
15855
- * an error to the parser.
16105
+ * Return a string content token at a particular location that is empty.
15856
16106
  */
15857
- static void
15858
- parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
15859
- // Skip this capture if it starts with an underscore.
15860
- if (*location->start == '_') return;
15861
-
15862
- if (pm_constant_id_list_includes(captures, capture)) {
15863
- pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
15864
- } else {
15865
- pm_constant_id_list_append(captures, capture);
15866
- }
16107
+ static pm_token_t
16108
+ parse_strings_empty_content(const uint8_t *location) {
16109
+ return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
15867
16110
  }
15868
16111
 
15869
16112
  /**
15870
- * Accept any number of constants joined by :: delimiters.
16113
+ * Parse a set of strings that could be concatenated together.
15871
16114
  */
15872
- static pm_node_t *
15873
- parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node) {
15874
- // Now, if there are any :: operators that follow, parse them as constant
15875
- // path nodes.
15876
- while (accept1(parser, PM_TOKEN_COLON_COLON)) {
15877
- pm_token_t delimiter = parser->previous;
15878
- expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
15879
- node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
15880
- }
15881
-
15882
- // If there is a [ or ( that follows, then this is part of a larger pattern
15883
- // expression. We'll parse the inner pattern here, then modify the returned
15884
- // inner pattern with our constant path attached.
15885
- if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
15886
- return node;
15887
- }
16115
+ static inline pm_node_t *
16116
+ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16117
+ assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
15888
16118
 
15889
- pm_token_t opening;
15890
- pm_token_t closing;
15891
- pm_node_t *inner = NULL;
16119
+ bool concating = false;
16120
+ bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
15892
16121
 
15893
- if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
15894
- opening = parser->previous;
15895
- accept1(parser, PM_TOKEN_NEWLINE);
16122
+ while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16123
+ pm_node_t *node = NULL;
15896
16124
 
15897
- if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
15898
- inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
15899
- accept1(parser, PM_TOKEN_NEWLINE);
15900
- expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
15901
- }
16125
+ // Here we have found a string literal. We'll parse it and add it to
16126
+ // the list of strings.
16127
+ const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16128
+ assert(lex_mode->mode == PM_LEX_STRING);
16129
+ bool lex_interpolation = lex_mode->as.string.interpolation;
15902
16130
 
15903
- closing = parser->previous;
15904
- } else {
16131
+ pm_token_t opening = parser->current;
15905
16132
  parser_lex(parser);
15906
- opening = parser->previous;
15907
- accept1(parser, PM_TOKEN_NEWLINE);
15908
-
15909
- if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15910
- inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
15911
- accept1(parser, PM_TOKEN_NEWLINE);
15912
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
15913
- }
15914
-
15915
- closing = parser->previous;
15916
- }
15917
-
15918
- if (!inner) {
15919
- // If there was no inner pattern, then we have something like Foo() or
15920
- // Foo[]. In that case we'll create an array pattern with no requireds.
15921
- return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
15922
- }
15923
16133
 
15924
- // Now that we have the inner pattern, check to see if it's an array, find,
15925
- // or hash pattern. If it is, then we'll attach our constant path to it if
15926
- // it doesn't already have a constant. If it's not one of those node types
15927
- // or it does have a constant, then we'll create an array pattern.
15928
- switch (PM_NODE_TYPE(inner)) {
15929
- case PM_ARRAY_PATTERN_NODE: {
15930
- pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16134
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16135
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16136
+ // If we get here, then we have an end immediately after a
16137
+ // start. In that case we'll create an empty content token and
16138
+ // return an uninterpolated string.
16139
+ pm_token_t content = parse_strings_empty_content(parser->previous.start);
16140
+ pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
15931
16141
 
15932
- if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
15933
- pattern_node->base.location.start = node->location.start;
15934
- pattern_node->base.location.end = closing.end;
16142
+ pm_string_shared_init(&string->unescaped, content.start, content.end);
16143
+ node = (pm_node_t *) string;
16144
+ } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16145
+ // If we get here, then we have an end of a label immediately
16146
+ // after a start. In that case we'll create an empty symbol
16147
+ // node.
16148
+ pm_token_t content = parse_strings_empty_content(parser->previous.start);
16149
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
15935
16150
 
15936
- pattern_node->constant = node;
15937
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
15938
- pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16151
+ pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16152
+ node = (pm_node_t *) symbol;
16153
+ } else if (!lex_interpolation) {
16154
+ // If we don't accept interpolation then we expect the string to
16155
+ // start with a single string content node.
16156
+ pm_string_t unescaped;
16157
+ pm_token_t content;
15939
16158
 
15940
- return (pm_node_t *) pattern_node;
16159
+ if (match1(parser, PM_TOKEN_EOF)) {
16160
+ unescaped = PM_STRING_EMPTY;
16161
+ content = not_provided(parser);
16162
+ } else {
16163
+ unescaped = parser->current_string;
16164
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16165
+ content = parser->previous;
15941
16166
  }
15942
16167
 
15943
- break;
15944
- }
15945
- case PM_FIND_PATTERN_NODE: {
15946
- pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
15947
-
15948
- if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
15949
- pattern_node->base.location.start = node->location.start;
15950
- pattern_node->base.location.end = closing.end;
15951
-
15952
- pattern_node->constant = node;
15953
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
15954
- pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16168
+ // It is unfortunately possible to have multiple string content
16169
+ // nodes in a row in the case that there's heredoc content in
16170
+ // the middle of the string, like this cursed example:
16171
+ //
16172
+ // <<-END+'b
16173
+ // a
16174
+ // END
16175
+ // c'+'d'
16176
+ //
16177
+ // In that case we need to switch to an interpolated string to
16178
+ // be able to contain all of the parts.
16179
+ if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16180
+ pm_node_list_t parts = { 0 };
15955
16181
 
15956
- return (pm_node_t *) pattern_node;
15957
- }
16182
+ pm_token_t delimiters = not_provided(parser);
16183
+ pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16184
+ pm_node_list_append(&parts, part);
15958
16185
 
15959
- break;
15960
- }
15961
- case PM_HASH_PATTERN_NODE: {
15962
- pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16186
+ do {
16187
+ part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16188
+ pm_node_list_append(&parts, part);
16189
+ parser_lex(parser);
16190
+ } while (match1(parser, PM_TOKEN_STRING_CONTENT));
15963
16191
 
15964
- if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
15965
- pattern_node->base.location.start = node->location.start;
15966
- pattern_node->base.location.end = closing.end;
16192
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16193
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
15967
16194
 
15968
- pattern_node->constant = node;
15969
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16195
+ pm_node_list_free(&parts);
16196
+ } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16197
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16198
+ } else if (match1(parser, PM_TOKEN_EOF)) {
16199
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16200
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16201
+ } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16202
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16203
+ } else {
16204
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16205
+ parser->previous.start = parser->previous.end;
16206
+ parser->previous.type = PM_TOKEN_MISSING;
16207
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16208
+ }
16209
+ } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16210
+ // In this case we've hit string content so we know the string
16211
+ // at least has something in it. We'll need to check if the
16212
+ // following token is the end (in which case we can return a
16213
+ // plain string) or if it's not then it has interpolation.
16214
+ pm_token_t content = parser->current;
16215
+ pm_string_t unescaped = parser->current_string;
16216
+ parser_lex(parser);
16217
+
16218
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16219
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16220
+ pm_node_flag_set(node, parse_unescaped_encoding(parser));
16221
+
16222
+ // Kind of odd behavior, but basically if we have an
16223
+ // unterminated string and it ends in a newline, we back up one
16224
+ // character so that the error message is on the last line of
16225
+ // content in the string.
16226
+ if (!accept1(parser, PM_TOKEN_STRING_END)) {
16227
+ const uint8_t *location = parser->previous.end;
16228
+ if (location > parser->start && location[-1] == '\n') location--;
16229
+ pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16230
+
16231
+ parser->previous.start = parser->previous.end;
16232
+ parser->previous.type = PM_TOKEN_MISSING;
16233
+ }
16234
+ } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16235
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16236
+ } else {
16237
+ // If we get here, then we have interpolation so we'll need
16238
+ // to create a string or symbol node with interpolation.
16239
+ pm_node_list_t parts = { 0 };
16240
+ pm_token_t string_opening = not_provided(parser);
16241
+ pm_token_t string_closing = not_provided(parser);
16242
+
16243
+ pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16244
+ pm_node_flag_set(part, parse_unescaped_encoding(parser));
16245
+ pm_node_list_append(&parts, part);
16246
+
16247
+ while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16248
+ if ((part = parse_string_part(parser)) != NULL) {
16249
+ pm_node_list_append(&parts, part);
16250
+ }
16251
+ }
16252
+
16253
+ if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16254
+ node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16255
+ } else if (match1(parser, PM_TOKEN_EOF)) {
16256
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16257
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16258
+ } else {
16259
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16260
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16261
+ }
16262
+
16263
+ pm_node_list_free(&parts);
16264
+ }
16265
+ } else {
16266
+ // If we get here, then the first part of the string is not plain
16267
+ // string content, in which case we need to parse the string as an
16268
+ // interpolated string.
16269
+ pm_node_list_t parts = { 0 };
16270
+ pm_node_t *part;
16271
+
16272
+ while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16273
+ if ((part = parse_string_part(parser)) != NULL) {
16274
+ pm_node_list_append(&parts, part);
16275
+ }
16276
+ }
16277
+
16278
+ if (accept1(parser, PM_TOKEN_LABEL_END)) {
16279
+ node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16280
+ } else if (match1(parser, PM_TOKEN_EOF)) {
16281
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16282
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16283
+ } else {
16284
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16285
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16286
+ }
16287
+
16288
+ pm_node_list_free(&parts);
16289
+ }
16290
+
16291
+ if (current == NULL) {
16292
+ // If the node we just parsed is a symbol node, then we can't
16293
+ // concatenate it with anything else, so we can now return that
16294
+ // node.
16295
+ if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16296
+ return node;
16297
+ }
16298
+
16299
+ // If we don't already have a node, then it's fine and we can just
16300
+ // set the result to be the node we just parsed.
16301
+ current = node;
16302
+ } else {
16303
+ // Otherwise we need to check the type of the node we just parsed.
16304
+ // If it cannot be concatenated with the previous node, then we'll
16305
+ // need to add a syntax error.
16306
+ if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16307
+ pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16308
+ }
16309
+
16310
+ // If we haven't already created our container for concatenation,
16311
+ // we'll do that now.
16312
+ if (!concating) {
16313
+ concating = true;
16314
+ pm_token_t bounds = not_provided(parser);
16315
+
16316
+ pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16317
+ pm_interpolated_string_node_append(container, current);
16318
+ current = (pm_node_t *) container;
16319
+ }
16320
+
16321
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16322
+ }
16323
+ }
16324
+
16325
+ return current;
16326
+ }
16327
+
16328
+ #define PM_PARSE_PATTERN_SINGLE 0
16329
+ #define PM_PARSE_PATTERN_TOP 1
16330
+ #define PM_PARSE_PATTERN_MULTI 2
16331
+
16332
+ static pm_node_t *
16333
+ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id);
16334
+
16335
+ /**
16336
+ * Add the newly created local to the list of captures for this pattern matching
16337
+ * expression. If it is duplicated from a previous local, then we'll need to add
16338
+ * an error to the parser.
16339
+ */
16340
+ static void
16341
+ parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16342
+ // Skip this capture if it starts with an underscore.
16343
+ if (*location->start == '_') return;
16344
+
16345
+ if (pm_constant_id_list_includes(captures, capture)) {
16346
+ pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16347
+ } else {
16348
+ pm_constant_id_list_append(captures, capture);
16349
+ }
16350
+ }
16351
+
16352
+ /**
16353
+ * Accept any number of constants joined by :: delimiters.
16354
+ */
16355
+ static pm_node_t *
16356
+ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node) {
16357
+ // Now, if there are any :: operators that follow, parse them as constant
16358
+ // path nodes.
16359
+ while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16360
+ pm_token_t delimiter = parser->previous;
16361
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16362
+ node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
16363
+ }
16364
+
16365
+ // If there is a [ or ( that follows, then this is part of a larger pattern
16366
+ // expression. We'll parse the inner pattern here, then modify the returned
16367
+ // inner pattern with our constant path attached.
16368
+ if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16369
+ return node;
16370
+ }
16371
+
16372
+ pm_token_t opening;
16373
+ pm_token_t closing;
16374
+ pm_node_t *inner = NULL;
16375
+
16376
+ if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16377
+ opening = parser->previous;
16378
+ accept1(parser, PM_TOKEN_NEWLINE);
16379
+
16380
+ if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16381
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
16382
+ accept1(parser, PM_TOKEN_NEWLINE);
16383
+ expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16384
+ }
16385
+
16386
+ closing = parser->previous;
16387
+ } else {
16388
+ parser_lex(parser);
16389
+ opening = parser->previous;
16390
+ accept1(parser, PM_TOKEN_NEWLINE);
16391
+
16392
+ if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16393
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16394
+ accept1(parser, PM_TOKEN_NEWLINE);
16395
+ expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16396
+ }
16397
+
16398
+ closing = parser->previous;
16399
+ }
16400
+
16401
+ if (!inner) {
16402
+ // If there was no inner pattern, then we have something like Foo() or
16403
+ // Foo[]. In that case we'll create an array pattern with no requireds.
16404
+ return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16405
+ }
16406
+
16407
+ // Now that we have the inner pattern, check to see if it's an array, find,
16408
+ // or hash pattern. If it is, then we'll attach our constant path to it if
16409
+ // it doesn't already have a constant. If it's not one of those node types
16410
+ // or it does have a constant, then we'll create an array pattern.
16411
+ switch (PM_NODE_TYPE(inner)) {
16412
+ case PM_ARRAY_PATTERN_NODE: {
16413
+ pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16414
+
16415
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16416
+ pattern_node->base.location.start = node->location.start;
16417
+ pattern_node->base.location.end = closing.end;
16418
+
16419
+ pattern_node->constant = node;
16420
+ pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16421
+ pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16422
+
16423
+ return (pm_node_t *) pattern_node;
16424
+ }
16425
+
16426
+ break;
16427
+ }
16428
+ case PM_FIND_PATTERN_NODE: {
16429
+ pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16430
+
16431
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16432
+ pattern_node->base.location.start = node->location.start;
16433
+ pattern_node->base.location.end = closing.end;
16434
+
16435
+ pattern_node->constant = node;
16436
+ pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16437
+ pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16438
+
16439
+ return (pm_node_t *) pattern_node;
16440
+ }
16441
+
16442
+ break;
16443
+ }
16444
+ case PM_HASH_PATTERN_NODE: {
16445
+ pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16446
+
16447
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16448
+ pattern_node->base.location.start = node->location.start;
16449
+ pattern_node->base.location.end = closing.end;
16450
+
16451
+ pattern_node->constant = node;
16452
+ pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
15970
16453
  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
15971
16454
 
15972
16455
  return (pm_node_t *) pattern_node;
@@ -16055,6 +16538,33 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
16055
16538
  return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
16056
16539
  }
16057
16540
 
16541
+ /**
16542
+ * Check that the slice of the source given by the bounds parameters constitutes
16543
+ * a valid local variable name.
16544
+ */
16545
+ static bool
16546
+ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16547
+ ptrdiff_t length = end - start;
16548
+ if (length == 0) return false;
16549
+
16550
+ // First ensure that it starts with a valid identifier starting character.
16551
+ size_t width = char_is_identifier_start(parser, start);
16552
+ if (width == 0) return false;
16553
+
16554
+ // Next, ensure that it's not an uppercase character.
16555
+ if (parser->encoding_changed) {
16556
+ if (parser->encoding->isupper_char(start, length)) return false;
16557
+ } else {
16558
+ if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16559
+ }
16560
+
16561
+ // Next, iterate through all of the bytes of the string to ensure that they
16562
+ // are all valid identifier characters.
16563
+ const uint8_t *cursor = start + width;
16564
+ while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
16565
+ return cursor == end;
16566
+ }
16567
+
16058
16568
  /**
16059
16569
  * Create an implicit node for the value of a hash pattern that has omitted the
16060
16570
  * value. This will use an implicit local variable target.
@@ -16062,14 +16572,18 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
16062
16572
  static pm_node_t *
16063
16573
  parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
16064
16574
  const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
16065
- pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
16066
16575
 
16576
+ pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
16067
16577
  int depth = -1;
16068
- if (value_loc->end[-1] == '!' || value_loc->end[-1] == '?') {
16069
- pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16070
- PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16071
- } else {
16578
+
16579
+ if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
16072
16580
  depth = pm_parser_local_depth_constant_id(parser, constant_id);
16581
+ } else {
16582
+ pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16583
+
16584
+ if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
16585
+ PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16586
+ }
16073
16587
  }
16074
16588
 
16075
16589
  if (depth == -1) {
@@ -16093,7 +16607,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
16093
16607
  */
16094
16608
  static void
16095
16609
  parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
16096
- if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
16610
+ if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
16097
16611
  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16098
16612
  }
16099
16613
  }
@@ -16124,7 +16638,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16124
16638
  } else {
16125
16639
  // Here we have a value for the first assoc in the list, so
16126
16640
  // we will parse it now.
16127
- value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16641
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16128
16642
  }
16129
16643
 
16130
16644
  pm_token_t operator = not_provided(parser);
@@ -16139,7 +16653,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16139
16653
  // If we get anything else, then this is an error. For this we'll
16140
16654
  // create a missing node for the value and create an assoc node for
16141
16655
  // the first node in the list.
16142
- pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
16656
+ pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16657
+ pm_parser_err_node(parser, first_node, diag_id);
16143
16658
 
16144
16659
  pm_token_t operator = not_provided(parser);
16145
16660
  pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
@@ -16167,8 +16682,20 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16167
16682
  pm_node_list_append(&assocs, assoc);
16168
16683
  }
16169
16684
  } else {
16170
- expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16171
- pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
16685
+ pm_node_t *key;
16686
+
16687
+ if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16688
+ key = parse_strings(parser, NULL);
16689
+
16690
+ if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16691
+ pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16692
+ } else if (!pm_symbol_node_label_p(key)) {
16693
+ pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16694
+ }
16695
+ } else {
16696
+ expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16697
+ key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
16698
+ }
16172
16699
 
16173
16700
  parse_pattern_hash_key(parser, &keys, key);
16174
16701
  pm_node_t *value = NULL;
@@ -16176,7 +16703,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16176
16703
  if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16177
16704
  value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
16178
16705
  } else {
16179
- value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16706
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16180
16707
  }
16181
16708
 
16182
16709
  pm_token_t operator = not_provided(parser);
@@ -16233,7 +16760,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16233
16760
 
16234
16761
  // Otherwise, we'll parse the inner pattern, then deal with it depending
16235
16762
  // on the type it returns.
16236
- pm_node_t *inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
16763
+ pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
16237
16764
 
16238
16765
  accept1(parser, PM_TOKEN_NEWLINE);
16239
16766
  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
@@ -16300,11 +16827,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16300
16827
  first_node = parse_pattern_keyword_rest(parser, captures);
16301
16828
  break;
16302
16829
  case PM_TOKEN_STRING_BEGIN:
16303
- first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
16830
+ first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY_LABEL);
16304
16831
  break;
16305
16832
  default: {
16833
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
16306
16834
  parser_lex(parser);
16307
- pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
16308
16835
 
16309
16836
  first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
16310
16837
  break;
@@ -16380,19 +16907,8 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16380
16907
  pm_node_t *variable = (pm_node_t *) parse_variable(parser);
16381
16908
 
16382
16909
  if (variable == NULL) {
16383
- if (
16384
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
16385
- !parser->current_scope->closed &&
16386
- (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
16387
- pm_token_is_it(parser->previous.start, parser->previous.end)
16388
- ) {
16389
- pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
16390
- if (read == NULL) read = pm_local_variable_read_node_create(parser, &parser->previous, 0);
16391
- variable = (pm_node_t *) read;
16392
- } else {
16393
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16394
- variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
16395
- }
16910
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16911
+ variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
16396
16912
  }
16397
16913
 
16398
16914
  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
@@ -16506,7 +17022,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
16506
17022
  pm_token_t opening = parser->current;
16507
17023
  parser_lex(parser);
16508
17024
 
16509
- pm_node_t *body = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
17025
+ pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16510
17026
  accept1(parser, PM_TOKEN_NEWLINE);
16511
17027
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16512
17028
  pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
@@ -16565,7 +17081,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
16565
17081
  * Parse a pattern matching expression.
16566
17082
  */
16567
17083
  static pm_node_t *
16568
- parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id) {
17084
+ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id) {
16569
17085
  pm_node_t *node = NULL;
16570
17086
 
16571
17087
  bool leading_rest = false;
@@ -16575,14 +17091,26 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
16575
17091
  case PM_TOKEN_LABEL: {
16576
17092
  parser_lex(parser);
16577
17093
  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
16578
- return (pm_node_t *) parse_pattern_hash(parser, captures, key);
17094
+ node = (pm_node_t *) parse_pattern_hash(parser, captures, key);
17095
+
17096
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
17097
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17098
+ }
17099
+
17100
+ return node;
16579
17101
  }
16580
17102
  case PM_TOKEN_USTAR_STAR: {
16581
17103
  node = parse_pattern_keyword_rest(parser, captures);
16582
- return (pm_node_t *) parse_pattern_hash(parser, captures, node);
17104
+ node = (pm_node_t *) parse_pattern_hash(parser, captures, node);
17105
+
17106
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
17107
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17108
+ }
17109
+
17110
+ return node;
16583
17111
  }
16584
17112
  case PM_TOKEN_USTAR: {
16585
- if (top_pattern) {
17113
+ if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
16586
17114
  parser_lex(parser);
16587
17115
  node = (pm_node_t *) parse_pattern_rest(parser, captures);
16588
17116
  leading_rest = true;
@@ -16601,7 +17129,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
16601
17129
  return (pm_node_t *) parse_pattern_hash(parser, captures, node);
16602
17130
  }
16603
17131
 
16604
- if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
17132
+ if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
16605
17133
  // If we have a comma, then we are now parsing either an array pattern or a
16606
17134
  // find pattern. We need to parse all of the patterns, put them into a big
16607
17135
  // list, and then determine which type of node we have.
@@ -16642,262 +17170,53 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
16642
17170
  if (PM_NODE_TYPE_P(nodes.nodes[0], PM_SPLAT_NODE) && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
16643
17171
  node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
16644
17172
  } else {
16645
- node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
16646
- }
16647
-
16648
- xfree(nodes.nodes);
16649
- } else if (leading_rest) {
16650
- // Otherwise, if we parsed a single splat pattern, then we know we have an
16651
- // array pattern, so we can go ahead and create that node.
16652
- node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
16653
- }
16654
-
16655
- return node;
16656
- }
16657
-
16658
- /**
16659
- * Incorporate a negative sign into a numeric node by subtracting 1 character
16660
- * from its start bounds. If it's a compound node, then we will recursively
16661
- * apply this function to its value.
16662
- */
16663
- static inline void
16664
- parse_negative_numeric(pm_node_t *node) {
16665
- switch (PM_NODE_TYPE(node)) {
16666
- case PM_INTEGER_NODE: {
16667
- pm_integer_node_t *cast = (pm_integer_node_t *) node;
16668
- cast->base.location.start--;
16669
- cast->value.negative = true;
16670
- break;
16671
- }
16672
- case PM_FLOAT_NODE: {
16673
- pm_float_node_t *cast = (pm_float_node_t *) node;
16674
- cast->base.location.start--;
16675
- cast->value = -cast->value;
16676
- break;
16677
- }
16678
- case PM_RATIONAL_NODE:
16679
- node->location.start--;
16680
- parse_negative_numeric(((pm_rational_node_t *) node)->numeric);
16681
- break;
16682
- case PM_IMAGINARY_NODE:
16683
- node->location.start--;
16684
- parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
16685
- break;
16686
- default:
16687
- assert(false && "unreachable");
16688
- break;
16689
- }
16690
- }
16691
-
16692
- /**
16693
- * Return a string content token at a particular location that is empty.
16694
- */
16695
- static pm_token_t
16696
- parse_strings_empty_content(const uint8_t *location) {
16697
- return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16698
- }
16699
-
16700
- /**
16701
- * Parse a set of strings that could be concatenated together.
16702
- */
16703
- static inline pm_node_t *
16704
- parse_strings(pm_parser_t *parser, pm_node_t *current) {
16705
- assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16706
-
16707
- bool concating = false;
16708
- bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
16709
-
16710
- while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16711
- pm_node_t *node = NULL;
16712
-
16713
- // Here we have found a string literal. We'll parse it and add it to
16714
- // the list of strings.
16715
- const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16716
- assert(lex_mode->mode == PM_LEX_STRING);
16717
- bool lex_interpolation = lex_mode->as.string.interpolation;
16718
-
16719
- pm_token_t opening = parser->current;
16720
- parser_lex(parser);
16721
-
16722
- if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16723
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16724
- // If we get here, then we have an end immediately after a
16725
- // start. In that case we'll create an empty content token and
16726
- // return an uninterpolated string.
16727
- pm_token_t content = parse_strings_empty_content(parser->previous.start);
16728
- pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16729
-
16730
- pm_string_shared_init(&string->unescaped, content.start, content.end);
16731
- node = (pm_node_t *) string;
16732
- } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16733
- // If we get here, then we have an end of a label immediately
16734
- // after a start. In that case we'll create an empty symbol
16735
- // node.
16736
- pm_token_t content = parse_strings_empty_content(parser->previous.start);
16737
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16738
-
16739
- pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16740
- node = (pm_node_t *) symbol;
16741
- } else if (!lex_interpolation) {
16742
- // If we don't accept interpolation then we expect the string to
16743
- // start with a single string content node.
16744
- pm_string_t unescaped;
16745
- pm_token_t content;
16746
- if (match1(parser, PM_TOKEN_EOF)) {
16747
- unescaped = PM_STRING_EMPTY;
16748
- content = not_provided(parser);
16749
- } else {
16750
- unescaped = parser->current_string;
16751
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16752
- content = parser->previous;
16753
- }
16754
-
16755
- // It is unfortunately possible to have multiple string content
16756
- // nodes in a row in the case that there's heredoc content in
16757
- // the middle of the string, like this cursed example:
16758
- //
16759
- // <<-END+'b
16760
- // a
16761
- // END
16762
- // c'+'d'
16763
- //
16764
- // In that case we need to switch to an interpolated string to
16765
- // be able to contain all of the parts.
16766
- if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16767
- pm_node_list_t parts = { 0 };
16768
-
16769
- pm_token_t delimiters = not_provided(parser);
16770
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16771
- pm_node_list_append(&parts, part);
16772
-
16773
- do {
16774
- part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16775
- pm_node_list_append(&parts, part);
16776
- parser_lex(parser);
16777
- } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16778
-
16779
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16780
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16781
-
16782
- pm_node_list_free(&parts);
16783
- } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16784
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16785
- } else if (match1(parser, PM_TOKEN_EOF)) {
16786
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16787
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16788
- } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16789
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16790
- } else {
16791
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16792
- parser->previous.start = parser->previous.end;
16793
- parser->previous.type = PM_TOKEN_MISSING;
16794
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16795
- }
16796
- } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16797
- // In this case we've hit string content so we know the string
16798
- // at least has something in it. We'll need to check if the
16799
- // following token is the end (in which case we can return a
16800
- // plain string) or if it's not then it has interpolation.
16801
- pm_token_t content = parser->current;
16802
- pm_string_t unescaped = parser->current_string;
16803
- parser_lex(parser);
16804
-
16805
- if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16806
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16807
- pm_node_flag_set(node, parse_unescaped_encoding(parser));
16808
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16809
- } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16810
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16811
- } else {
16812
- // If we get here, then we have interpolation so we'll need
16813
- // to create a string or symbol node with interpolation.
16814
- pm_node_list_t parts = { 0 };
16815
- pm_token_t string_opening = not_provided(parser);
16816
- pm_token_t string_closing = not_provided(parser);
16817
-
16818
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16819
- pm_node_flag_set(part, parse_unescaped_encoding(parser));
16820
- pm_node_list_append(&parts, part);
16821
-
16822
- while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16823
- if ((part = parse_string_part(parser)) != NULL) {
16824
- pm_node_list_append(&parts, part);
16825
- }
16826
- }
16827
-
16828
- if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16829
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16830
- } else if (match1(parser, PM_TOKEN_EOF)) {
16831
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16832
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16833
- } else {
16834
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16835
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16836
- }
16837
-
16838
- pm_node_list_free(&parts);
16839
- }
16840
- } else {
16841
- // If we get here, then the first part of the string is not plain
16842
- // string content, in which case we need to parse the string as an
16843
- // interpolated string.
16844
- pm_node_list_t parts = { 0 };
16845
- pm_node_t *part;
16846
-
16847
- while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16848
- if ((part = parse_string_part(parser)) != NULL) {
16849
- pm_node_list_append(&parts, part);
16850
- }
16851
- }
16852
-
16853
- if (accept1(parser, PM_TOKEN_LABEL_END)) {
16854
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16855
- } else if (match1(parser, PM_TOKEN_EOF)) {
16856
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16857
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16858
- } else {
16859
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16860
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16861
- }
16862
-
16863
- pm_node_list_free(&parts);
16864
- }
16865
-
16866
- if (current == NULL) {
16867
- // If the node we just parsed is a symbol node, then we can't
16868
- // concatenate it with anything else, so we can now return that
16869
- // node.
16870
- if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16871
- return node;
16872
- }
16873
-
16874
- // If we don't already have a node, then it's fine and we can just
16875
- // set the result to be the node we just parsed.
16876
- current = node;
16877
- } else {
16878
- // Otherwise we need to check the type of the node we just parsed.
16879
- // If it cannot be concatenated with the previous node, then we'll
16880
- // need to add a syntax error.
16881
- if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16882
- pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16883
- }
17173
+ node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17174
+ }
16884
17175
 
16885
- // If we haven't already created our container for concatenation,
16886
- // we'll do that now.
16887
- if (!concating) {
16888
- concating = true;
16889
- pm_token_t bounds = not_provided(parser);
17176
+ xfree(nodes.nodes);
17177
+ } else if (leading_rest) {
17178
+ // Otherwise, if we parsed a single splat pattern, then we know we have an
17179
+ // array pattern, so we can go ahead and create that node.
17180
+ node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17181
+ }
16890
17182
 
16891
- pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16892
- pm_interpolated_string_node_append(container, current);
16893
- current = (pm_node_t *) container;
16894
- }
17183
+ return node;
17184
+ }
16895
17185
 
16896
- pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
17186
+ /**
17187
+ * Incorporate a negative sign into a numeric node by subtracting 1 character
17188
+ * from its start bounds. If it's a compound node, then we will recursively
17189
+ * apply this function to its value.
17190
+ */
17191
+ static inline void
17192
+ parse_negative_numeric(pm_node_t *node) {
17193
+ switch (PM_NODE_TYPE(node)) {
17194
+ case PM_INTEGER_NODE: {
17195
+ pm_integer_node_t *cast = (pm_integer_node_t *) node;
17196
+ cast->base.location.start--;
17197
+ cast->value.negative = true;
17198
+ break;
17199
+ }
17200
+ case PM_FLOAT_NODE: {
17201
+ pm_float_node_t *cast = (pm_float_node_t *) node;
17202
+ cast->base.location.start--;
17203
+ cast->value = -cast->value;
17204
+ break;
17205
+ }
17206
+ case PM_RATIONAL_NODE: {
17207
+ pm_rational_node_t *cast = (pm_rational_node_t *) node;
17208
+ cast->base.location.start--;
17209
+ cast->numerator.negative = true;
17210
+ break;
16897
17211
  }
17212
+ case PM_IMAGINARY_NODE:
17213
+ node->location.start--;
17214
+ parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17215
+ break;
17216
+ default:
17217
+ assert(false && "unreachable");
17218
+ break;
16898
17219
  }
16899
-
16900
- return current;
16901
17220
  }
16902
17221
 
16903
17222
  /**
@@ -16912,6 +17231,11 @@ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
16912
17231
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
16913
17232
  break;
16914
17233
  }
17234
+ case PM_ERR_HASH_VALUE:
17235
+ case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17236
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17237
+ break;
17238
+ }
16915
17239
  case PM_ERR_UNARY_RECEIVER: {
16916
17240
  const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
16917
17241
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
@@ -17090,6 +17414,63 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17090
17414
  }
17091
17415
  }
17092
17416
 
17417
+ /**
17418
+ * This struct is used to pass information between the regular expression parser
17419
+ * and the error callback.
17420
+ */
17421
+ typedef struct {
17422
+ /** The parser that we are parsing the regular expression for. */
17423
+ pm_parser_t *parser;
17424
+
17425
+ /** The start of the regular expression. */
17426
+ const uint8_t *start;
17427
+
17428
+ /** The end of the regular expression. */
17429
+ const uint8_t *end;
17430
+
17431
+ /**
17432
+ * Whether or not the source of the regular expression is shared. This
17433
+ * impacts the location of error messages, because if it is shared then we
17434
+ * can use the location directly and if it is not, then we use the bounds of
17435
+ * the regular expression itself.
17436
+ */
17437
+ bool shared;
17438
+ } parse_regular_expression_error_data_t;
17439
+
17440
+ /**
17441
+ * This callback is called when the regular expression parser encounters a
17442
+ * syntax error.
17443
+ */
17444
+ static void
17445
+ parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17446
+ parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
17447
+ pm_location_t location;
17448
+
17449
+ if (callback_data->shared) {
17450
+ location = (pm_location_t) { .start = start, .end = end };
17451
+ } else {
17452
+ location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
17453
+ }
17454
+
17455
+ PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
17456
+ }
17457
+
17458
+ /**
17459
+ * Parse the errors for the regular expression and add them to the parser.
17460
+ */
17461
+ static void
17462
+ parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
17463
+ const pm_string_t *unescaped = &node->unescaped;
17464
+ parse_regular_expression_error_data_t error_data = {
17465
+ .parser = parser,
17466
+ .start = node->base.location.start,
17467
+ .end = node->base.location.end,
17468
+ .shared = unescaped->type == PM_STRING_SHARED
17469
+ };
17470
+
17471
+ pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), NULL, NULL, parse_regular_expression_error, &error_data);
17472
+ }
17473
+
17093
17474
  /**
17094
17475
  * Parse an expression that begins with the previous node that we just lexed.
17095
17476
  */
@@ -17110,8 +17491,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17110
17491
  break;
17111
17492
  }
17112
17493
 
17113
- if (pm_array_node_size(array) != 0) {
17114
- expect1(parser, PM_TOKEN_COMMA, PM_ERR_ARRAY_SEPARATOR);
17494
+ // Ensure that we have a comma between elements in the array.
17495
+ if ((pm_array_node_size(array) != 0) && !accept1(parser, PM_TOKEN_COMMA)) {
17496
+ const uint8_t *location = parser->previous.end;
17497
+ PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
17498
+
17499
+ parser->previous.start = location;
17500
+ parser->previous.type = PM_TOKEN_MISSING;
17115
17501
  }
17116
17502
 
17117
17503
  // If we have a right bracket immediately following a comma,
@@ -17289,7 +17675,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17289
17675
 
17290
17676
  // If we didn't find a terminator and we didn't find a right
17291
17677
  // parenthesis, then this is a syntax error.
17292
- if (!terminator_found) {
17678
+ if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
17293
17679
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17294
17680
  }
17295
17681
 
@@ -17318,7 +17704,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17318
17704
  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
17319
17705
  } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17320
17706
  break;
17321
- } else {
17707
+ } else if (!match1(parser, PM_TOKEN_EOF)) {
17708
+ // If we're at the end of the file, then we're going to add
17709
+ // an error after this for the ) anyway.
17322
17710
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17323
17711
  }
17324
17712
  }
@@ -17537,8 +17925,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17537
17925
  ) {
17538
17926
  pm_arguments_t arguments = { 0 };
17539
17927
  parse_arguments_list(parser, &arguments, true, accepts_command_call);
17540
-
17541
17928
  pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
17929
+
17930
+ if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
17931
+ // If we're about to convert an 'it' implicit local
17932
+ // variable read into a method call, we need to remove
17933
+ // it from the list of implicit local variables.
17934
+ parse_target_implicit_parameter(parser, node);
17935
+ } else {
17936
+ // Otherwise, we're about to convert a regular local
17937
+ // variable read into a method call, in which case we
17938
+ // need to indicate that this was not a read for the
17939
+ // purposes of warnings.
17940
+ assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
17941
+
17942
+ if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
17943
+ parse_target_implicit_parameter(parser, node);
17944
+ } else {
17945
+ pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
17946
+ pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
17947
+ }
17948
+ }
17949
+
17542
17950
  pm_node_destroy(parser, node);
17543
17951
  return (pm_node_t *) fcall;
17544
17952
  }
@@ -17546,31 +17954,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17546
17954
 
17547
17955
  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17548
17956
  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
17549
- } else {
17550
- // Check if `it` is not going to be assigned.
17551
- switch (parser->current.type) {
17552
- case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
17553
- case PM_TOKEN_AMPERSAND_EQUAL:
17554
- case PM_TOKEN_CARET_EQUAL:
17555
- case PM_TOKEN_EQUAL:
17556
- case PM_TOKEN_GREATER_GREATER_EQUAL:
17557
- case PM_TOKEN_LESS_LESS_EQUAL:
17558
- case PM_TOKEN_MINUS_EQUAL:
17559
- case PM_TOKEN_PARENTHESIS_RIGHT:
17560
- case PM_TOKEN_PERCENT_EQUAL:
17561
- case PM_TOKEN_PIPE_EQUAL:
17562
- case PM_TOKEN_PIPE_PIPE_EQUAL:
17563
- case PM_TOKEN_PLUS_EQUAL:
17564
- case PM_TOKEN_SLASH_EQUAL:
17565
- case PM_TOKEN_STAR_EQUAL:
17566
- case PM_TOKEN_STAR_STAR_EQUAL:
17567
- break;
17568
- default:
17569
- // Once we know it's neither a method call nor an
17570
- // assignment, we can finally create `it` default
17571
- // parameter.
17572
- node = pm_node_check_it(parser, node);
17573
- }
17574
17957
  }
17575
17958
 
17576
17959
  return node;
@@ -17831,6 +18214,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17831
18214
  // as frozen because when clause strings are frozen.
17832
18215
  if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
17833
18216
  pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18217
+ } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18218
+ pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
17834
18219
  }
17835
18220
 
17836
18221
  pm_when_clause_static_literals_add(parser, &literals, condition);
@@ -17887,7 +18272,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17887
18272
  pm_token_t in_keyword = parser->previous;
17888
18273
 
17889
18274
  pm_constant_id_list_t captures = { 0 };
17890
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
18275
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
17891
18276
 
17892
18277
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17893
18278
  pm_constant_id_list_free(&captures);
@@ -17916,7 +18301,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17916
18301
  then_keyword = not_provided(parser);
17917
18302
  }
17918
18303
  } else {
17919
- expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18304
+ expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
17920
18305
  then_keyword = parser->previous;
17921
18306
  }
17922
18307
 
@@ -18236,7 +18621,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18236
18621
 
18237
18622
  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18238
18623
  receiver = parse_variable_call(parser);
18239
- receiver = pm_node_check_it(parser, receiver);
18240
18624
 
18241
18625
  pm_parser_scope_push(parser, true);
18242
18626
  lex_state_set(parser, PM_LEX_STATE_FNAME);
@@ -18370,7 +18754,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18370
18754
  lex_state_set(parser, PM_LEX_STATE_BEG);
18371
18755
  parser->command_start = true;
18372
18756
 
18373
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_DEF_PARAMS_TERM_PAREN);
18757
+ if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18758
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
18759
+ parser->previous.start = parser->previous.end;
18760
+ parser->previous.type = PM_TOKEN_MISSING;
18761
+ }
18762
+
18374
18763
  rparen = parser->previous;
18375
18764
  break;
18376
18765
  }
@@ -18568,7 +18957,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18568
18957
  if (match1(parser, PM_TOKEN_COMMA)) {
18569
18958
  index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
18570
18959
  } else {
18571
- index = parse_target(parser, index, false);
18960
+ index = parse_target(parser, index, false, false);
18572
18961
  }
18573
18962
 
18574
18963
  context_pop(parser);
@@ -19203,13 +19592,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19203
19592
  bool ascii_only = parser->current_regular_expression_ascii_only;
19204
19593
  parser_lex(parser);
19205
19594
 
19206
- // If we hit an end, then we can create a regular expression node
19207
- // without interpolation, which can be represented more succinctly and
19208
- // more easily compiled.
19595
+ // If we hit an end, then we can create a regular expression
19596
+ // node without interpolation, which can be represented more
19597
+ // succinctly and more easily compiled.
19209
19598
  if (accept1(parser, PM_TOKEN_REGEXP_END)) {
19210
- pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19211
- pm_node_flag_set(node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->flags));
19212
- return node;
19599
+ pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19600
+
19601
+ // If we're not immediately followed by a =~, then we want
19602
+ // to parse all of the errors at this point. If it is
19603
+ // followed by a =~, then it will get parsed higher up while
19604
+ // parsing the named captures as well.
19605
+ if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
19606
+ parse_regular_expression_errors(parser, node);
19607
+ }
19608
+
19609
+ pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
19610
+ return (pm_node_t *) node;
19213
19611
  }
19214
19612
 
19215
19613
  // If we get here, then we have interpolation so we'll need to create
@@ -19219,6 +19617,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19219
19617
  pm_token_t opening = not_provided(parser);
19220
19618
  pm_token_t closing = not_provided(parser);
19221
19619
  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
19620
+
19621
+ if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
19622
+ // This is extremely strange, but the first string part of a
19623
+ // regular expression will always be tagged as binary if we
19624
+ // are in a US-ASCII file, no matter its contents.
19625
+ pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
19626
+ }
19627
+
19222
19628
  pm_interpolated_regular_expression_node_append(interpolated, part);
19223
19629
  } else {
19224
19630
  // If the first part of the body of the regular expression is not a
@@ -19419,9 +19825,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19419
19825
 
19420
19826
  switch (parser->current.type) {
19421
19827
  case PM_TOKEN_PARENTHESIS_LEFT: {
19422
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
19423
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
19424
-
19425
19828
  pm_token_t opening = parser->current;
19426
19829
  parser_lex(parser);
19427
19830
 
@@ -19438,9 +19841,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19438
19841
  break;
19439
19842
  }
19440
19843
  case PM_CASE_PARAMETER: {
19441
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
19442
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
19443
-
19444
19844
  pm_accepts_block_stack_push(parser, false);
19445
19845
  pm_token_t opening = not_provided(parser);
19446
19846
  block_parameters = parse_block_parameters(parser, false, &opening, true);
@@ -19693,122 +20093,126 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
19693
20093
  }
19694
20094
 
19695
20095
  /**
19696
- * Returns true if the name of the capture group is a valid local variable that
19697
- * can be written to.
20096
+ * This struct is used to pass information between the regular expression parser
20097
+ * and the named capture callback.
19698
20098
  */
19699
- static bool
19700
- parse_regular_expression_named_capture(pm_parser_t *parser, const uint8_t *source, size_t length) {
19701
- if (length == 0) {
19702
- return false;
19703
- }
20099
+ typedef struct {
20100
+ /** The parser that is parsing the regular expression. */
20101
+ pm_parser_t *parser;
19704
20102
 
19705
- // First ensure that it starts with a valid identifier starting character.
19706
- size_t width = char_is_identifier_start(parser, source);
19707
- if (!width) {
19708
- return false;
19709
- }
20103
+ /** The call node wrapping the regular expression node. */
20104
+ pm_call_node_t *call;
19710
20105
 
19711
- // Next, ensure that it's not an uppercase character.
19712
- if (parser->encoding_changed) {
19713
- if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
19714
- } else {
19715
- if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
19716
- }
20106
+ /** The match write node that is being created. */
20107
+ pm_match_write_node_t *match;
19717
20108
 
19718
- // Next, iterate through all of the bytes of the string to ensure that they
19719
- // are all valid identifier characters.
19720
- const uint8_t *cursor = source + width;
19721
- while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
19722
- cursor += width;
19723
- }
20109
+ /** The list of names that have been parsed. */
20110
+ pm_constant_id_list_t names;
19724
20111
 
19725
- return cursor == source + length;
19726
- }
20112
+ /**
20113
+ * Whether the content of the regular expression is shared. This impacts
20114
+ * whether or not we used owned constants or shared constants in the
20115
+ * constant pool for the names of the captures.
20116
+ */
20117
+ bool shared;
20118
+ } parse_regular_expression_named_capture_data_t;
19727
20119
 
19728
20120
  /**
19729
- * Potentially change a =~ with a regular expression with named captures into a
19730
- * match write node.
20121
+ * This callback is called when the regular expression parser encounters a named
20122
+ * capture group.
19731
20123
  */
19732
- static pm_node_t *
19733
- parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
19734
- pm_string_list_t named_captures = { 0 };
19735
- pm_node_t *result;
20124
+ static void
20125
+ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20126
+ parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data;
19736
20127
 
19737
- if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, parser->encoding) && (named_captures.length > 0)) {
19738
- // Since we should not create a MatchWriteNode when all capture names
19739
- // are invalid, creating a MatchWriteNode is delaid here.
19740
- pm_match_write_node_t *match = NULL;
19741
- pm_constant_id_list_t names = { 0 };
20128
+ pm_parser_t *parser = callback_data->parser;
20129
+ pm_call_node_t *call = callback_data->call;
20130
+ pm_constant_id_list_t *names = &callback_data->names;
19742
20131
 
19743
- for (size_t index = 0; index < named_captures.length; index++) {
19744
- pm_string_t *string = &named_captures.strings[index];
20132
+ const uint8_t *source = pm_string_source(capture);
20133
+ size_t length = pm_string_length(capture);
19745
20134
 
19746
- const uint8_t *source = pm_string_source(string);
19747
- size_t length = pm_string_length(string);
20135
+ pm_location_t location;
20136
+ pm_constant_id_t name;
19748
20137
 
19749
- pm_location_t location;
19750
- pm_constant_id_t name;
20138
+ // If the name of the capture group isn't a valid identifier, we do
20139
+ // not add it to the local table.
20140
+ if (!pm_slice_is_valid_local(parser, source, source + length)) return;
19751
20141
 
19752
- // If the name of the capture group isn't a valid identifier, we do
19753
- // not add it to the local table.
19754
- if (!parse_regular_expression_named_capture(parser, source, length)) continue;
20142
+ if (callback_data->shared) {
20143
+ // If the unescaped string is a slice of the source, then we can
20144
+ // copy the names directly. The pointers will line up.
20145
+ location = (pm_location_t) { .start = source, .end = source + length };
20146
+ name = pm_parser_constant_id_location(parser, location.start, location.end);
20147
+ } else {
20148
+ // Otherwise, the name is a slice of the malloc-ed owned string,
20149
+ // in which case we need to copy it out into a new string.
20150
+ location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
19755
20151
 
19756
- if (content->type == PM_STRING_SHARED) {
19757
- // If the unescaped string is a slice of the source, then we can
19758
- // copy the names directly. The pointers will line up.
19759
- location = (pm_location_t) { .start = source, .end = source + length };
19760
- name = pm_parser_constant_id_location(parser, location.start, location.end);
19761
- } else {
19762
- // Otherwise, the name is a slice of the malloc-ed owned string,
19763
- // in which case we need to copy it out into a new string.
19764
- location = call->receiver->location;
20152
+ void *memory = xmalloc(length);
20153
+ if (memory == NULL) abort();
19765
20154
 
19766
- void *memory = xmalloc(length);
19767
- if (memory == NULL) abort();
20155
+ memcpy(memory, source, length);
20156
+ name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20157
+ }
19768
20158
 
19769
- memcpy(memory, source, length);
19770
- name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
19771
- }
20159
+ // Add this name to the list of constants if it is valid, not duplicated,
20160
+ // and not a keyword.
20161
+ if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20162
+ pm_constant_id_list_append(names, name);
19772
20163
 
19773
- if (name != 0) {
19774
- // We dont want to create duplicate targets if the capture name
19775
- // is duplicated.
19776
- if (pm_constant_id_list_includes(&names, name)) continue;
19777
- pm_constant_id_list_append(&names, name);
20164
+ int depth;
20165
+ if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20166
+ // If the local is not already a local but it is a keyword, then we
20167
+ // do not want to add a capture for this.
20168
+ if (pm_local_is_keyword((const char *) source, length)) return;
19778
20169
 
19779
- int depth;
19780
- if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
19781
- // If the identifier is not already a local, then we'll add
19782
- // it to the local table unless it's a keyword.
19783
- if (pm_local_is_keyword((const char *) source, length)) continue;
20170
+ // If the identifier is not already a local, then we will add it to
20171
+ // the local table.
20172
+ pm_parser_local_add(parser, name, location.start, location.end, 0);
20173
+ }
19784
20174
 
19785
- pm_parser_local_add(parser, name, location.start, location.end, 0);
19786
- }
20175
+ // Here we lazily create the MatchWriteNode since we know we're
20176
+ // about to add a target.
20177
+ if (callback_data->match == NULL) {
20178
+ callback_data->match = pm_match_write_node_create(parser, call);
20179
+ }
19787
20180
 
19788
- // Here we lazily create the MatchWriteNode since we know we're
19789
- // about to add a target.
19790
- if (match == NULL) match = pm_match_write_node_create(parser, call);
20181
+ // Next, create the local variable target and add it to the list of
20182
+ // targets for the match.
20183
+ pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
20184
+ pm_node_list_append(&callback_data->match->targets, target);
20185
+ }
20186
+ }
19791
20187
 
19792
- // Next, create the local variable target and add it to the
19793
- // list of targets for the match.
19794
- pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
19795
- pm_node_list_append(&match->targets, target);
19796
- }
19797
- }
20188
+ /**
20189
+ * Potentially change a =~ with a regular expression with named captures into a
20190
+ * match write node.
20191
+ */
20192
+ static pm_node_t *
20193
+ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
20194
+ parse_regular_expression_named_capture_data_t callback_data = {
20195
+ .parser = parser,
20196
+ .call = call,
20197
+ .names = { 0 },
20198
+ .shared = content->type == PM_STRING_SHARED
20199
+ };
19798
20200
 
19799
- if (match != NULL) {
19800
- result = (pm_node_t *) match;
19801
- } else {
19802
- result = (pm_node_t *) call;
19803
- }
20201
+ parse_regular_expression_error_data_t error_data = {
20202
+ .parser = parser,
20203
+ .start = call->receiver->location.start,
20204
+ .end = call->receiver->location.end,
20205
+ .shared = content->type == PM_STRING_SHARED
20206
+ };
20207
+
20208
+ pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20209
+ pm_constant_id_list_free(&callback_data.names);
19804
20210
 
19805
- pm_constant_id_list_free(&names);
20211
+ if (callback_data.match != NULL) {
20212
+ return (pm_node_t *) callback_data.match;
19806
20213
  } else {
19807
- result = (pm_node_t *) call;
20214
+ return (pm_node_t *) call;
19808
20215
  }
19809
-
19810
- pm_string_list_free(&named_captures);
19811
- return result;
19812
20216
  }
19813
20217
 
19814
20218
  static inline pm_node_t *
@@ -19925,7 +20329,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
19925
20329
  return result;
19926
20330
  }
19927
20331
  case PM_CALL_NODE: {
19928
- parser_lex(parser);
19929
20332
  pm_call_node_t *cast = (pm_call_node_t *) node;
19930
20333
 
19931
20334
  // If we have a vcall (a method with no arguments and no
@@ -19936,6 +20339,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
19936
20339
  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
19937
20340
 
19938
20341
  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20342
+ parser_lex(parser);
20343
+
19939
20344
  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
19940
20345
  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
19941
20346
 
@@ -19943,6 +20348,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
19943
20348
  return result;
19944
20349
  }
19945
20350
 
20351
+ // Move past the token here so that we have already added
20352
+ // the local variable by this point.
20353
+ parser_lex(parser);
20354
+
19946
20355
  // If there is no call operator and the message is "[]" then
19947
20356
  // this is an aref expression, and we can transform it into
19948
20357
  // an aset expression.
@@ -20038,7 +20447,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20038
20447
  return result;
20039
20448
  }
20040
20449
  case PM_CALL_NODE: {
20041
- parser_lex(parser);
20042
20450
  pm_call_node_t *cast = (pm_call_node_t *) node;
20043
20451
 
20044
20452
  // If we have a vcall (a method with no arguments and no
@@ -20049,6 +20457,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20049
20457
  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20050
20458
 
20051
20459
  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20460
+ parser_lex(parser);
20461
+
20052
20462
  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
20053
20463
  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
20054
20464
 
@@ -20056,6 +20466,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20056
20466
  return result;
20057
20467
  }
20058
20468
 
20469
+ // Move past the token here so that we have already added
20470
+ // the local variable by this point.
20471
+ parser_lex(parser);
20472
+
20059
20473
  // If there is no call operator and the message is "[]" then
20060
20474
  // this is an aref expression, and we can transform it into
20061
20475
  // an aset expression.
@@ -20209,7 +20623,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20209
20623
  // In this case we have an operator but we don't know what it's for.
20210
20624
  // We need to treat it as an error. For now, we'll mark it as an error
20211
20625
  // and just skip right past it.
20212
- pm_parser_err_previous(parser, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
20626
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
20213
20627
  return node;
20214
20628
  }
20215
20629
  }
@@ -20465,7 +20879,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20465
20879
 
20466
20880
  if (
20467
20881
  (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
20468
- (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))
20882
+ (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
20469
20883
  ) {
20470
20884
  // If we have a constant immediately following a '::' operator, then
20471
20885
  // this can either be a constant path or a method call, depending on
@@ -20591,7 +21005,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20591
21005
  parser_lex(parser);
20592
21006
 
20593
21007
  pm_constant_id_list_t captures = { 0 };
20594
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
21008
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
20595
21009
 
20596
21010
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
20597
21011
  pm_constant_id_list_free(&captures);
@@ -20608,7 +21022,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20608
21022
  parser_lex(parser);
20609
21023
 
20610
21024
  pm_constant_id_list_t captures = { 0 };
20611
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
21025
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
20612
21026
 
20613
21027
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
20614
21028
  pm_constant_id_list_free(&captures);
@@ -20621,6 +21035,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20621
21035
  }
20622
21036
  }
20623
21037
 
21038
+ #undef PM_PARSE_PATTERN_SINGLE
21039
+ #undef PM_PARSE_PATTERN_TOP
21040
+ #undef PM_PARSE_PATTERN_MULTI
21041
+
20624
21042
  /**
20625
21043
  * Parse an expression at the given point of the parser using the given binding
20626
21044
  * power to parse subsequent chains. If this function finds a syntax error, it
@@ -21004,7 +21422,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
21004
21422
 
21005
21423
  // Scopes given from the outside are not allowed to have numbered
21006
21424
  // parameters.
21007
- parser->current_scope->numbered_parameters = PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED;
21425
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
21008
21426
 
21009
21427
  for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
21010
21428
  const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
@@ -21392,331 +21810,3 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
21392
21810
  }
21393
21811
 
21394
21812
  #endif
21395
-
21396
- /** An error that is going to be formatted into the output. */
21397
- typedef struct {
21398
- /** A pointer to the diagnostic that was generated during parsing. */
21399
- pm_diagnostic_t *error;
21400
-
21401
- /** The start line of the diagnostic message. */
21402
- int32_t line;
21403
-
21404
- /** The column start of the diagnostic message. */
21405
- uint32_t column_start;
21406
-
21407
- /** The column end of the diagnostic message. */
21408
- uint32_t column_end;
21409
- } pm_error_t;
21410
-
21411
- /** The format that will be used to format the errors into the output. */
21412
- typedef struct {
21413
- /** The prefix that will be used for line numbers. */
21414
- const char *number_prefix;
21415
-
21416
- /** The prefix that will be used for blank lines. */
21417
- const char *blank_prefix;
21418
-
21419
- /** The divider that will be used between sections of source code. */
21420
- const char *divider;
21421
-
21422
- /** The length of the blank prefix. */
21423
- size_t blank_prefix_length;
21424
-
21425
- /** The length of the divider. */
21426
- size_t divider_length;
21427
- } pm_error_format_t;
21428
-
21429
- #define PM_COLOR_GRAY "\033[38;5;102m"
21430
- #define PM_COLOR_RED "\033[1;31m"
21431
- #define PM_COLOR_RESET "\033[m"
21432
-
21433
- static inline pm_error_t *
21434
- pm_parser_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
21435
- pm_error_t *errors = xcalloc(error_list->size, sizeof(pm_error_t));
21436
- if (errors == NULL) return NULL;
21437
-
21438
- int32_t start_line = parser->start_line;
21439
- for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
21440
- pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
21441
- pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
21442
-
21443
- // We're going to insert this error into the array in sorted order. We
21444
- // do this by finding the first error that has a line number greater
21445
- // than the current error and then inserting the current error before
21446
- // that one.
21447
- size_t index = 0;
21448
- while (
21449
- (index < error_list->size) &&
21450
- (errors[index].error != NULL) &&
21451
- (
21452
- (errors[index].line < start.line) ||
21453
- ((errors[index].line == start.line) && (errors[index].column_start < start.column))
21454
- )
21455
- ) index++;
21456
-
21457
- // Now we're going to shift all of the errors after this one down one
21458
- // index to make room for the new error.
21459
- if (index + 1 < error_list->size) {
21460
- memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
21461
- }
21462
-
21463
- // Finally, we'll insert the error into the array.
21464
- uint32_t column_end;
21465
- if (start.line == end.line) {
21466
- column_end = end.column;
21467
- } else {
21468
- column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
21469
- }
21470
-
21471
- // Ensure we have at least one column of error.
21472
- if (start.column == column_end) column_end++;
21473
-
21474
- errors[index] = (pm_error_t) {
21475
- .error = error,
21476
- .line = start.line,
21477
- .column_start = start.column,
21478
- .column_end = column_end
21479
- };
21480
- }
21481
-
21482
- return errors;
21483
- }
21484
-
21485
- static inline void
21486
- pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, pm_buffer_t *buffer) {
21487
- int32_t line_delta = line - parser->start_line;
21488
- assert(line_delta >= 0);
21489
-
21490
- size_t index = (size_t) line_delta;
21491
- assert(index < newline_list->size);
21492
-
21493
- const uint8_t *start = &parser->start[newline_list->offsets[index]];
21494
- const uint8_t *end;
21495
-
21496
- if (index >= newline_list->size - 1) {
21497
- end = parser->end;
21498
- } else {
21499
- end = &parser->start[newline_list->offsets[index + 1]];
21500
- }
21501
-
21502
- pm_buffer_append_format(buffer, number_prefix, line);
21503
- pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
21504
-
21505
- if (end == parser->end && end[-1] != '\n') {
21506
- pm_buffer_append_string(buffer, "\n", 1);
21507
- }
21508
- }
21509
-
21510
- /**
21511
- * Format the errors on the parser into the given buffer.
21512
- */
21513
- PRISM_EXPORTED_FUNCTION void
21514
- pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, pm_buffer_t *buffer, bool colorize, bool inline_messages) {
21515
- assert(error_list->size != 0);
21516
-
21517
- // First, we're going to sort all of the errors by line number using an
21518
- // insertion sort into a newly allocated array.
21519
- const int32_t start_line = parser->start_line;
21520
- const pm_newline_list_t *newline_list = &parser->newline_list;
21521
-
21522
- pm_error_t *errors = pm_parser_errors_format_sort(parser, error_list, newline_list);
21523
- if (errors == NULL) return;
21524
-
21525
- // Now we're going to determine how we're going to format line numbers and
21526
- // blank lines based on the maximum number of digits in the line numbers
21527
- // that are going to be displaid.
21528
- pm_error_format_t error_format;
21529
- int32_t first_line_number = errors[0].line;
21530
- int32_t last_line_number = errors[error_list->size - 1].line;
21531
-
21532
- // If we have a maximum line number that is negative, then we're going to
21533
- // use the absolute value for comparison but multiple by 10 to additionally
21534
- // have a column for the negative sign.
21535
- if (first_line_number < 0) first_line_number = (-first_line_number) * 10;
21536
- if (last_line_number < 0) last_line_number = (-last_line_number) * 10;
21537
- int32_t max_line_number = first_line_number > last_line_number ? first_line_number : last_line_number;
21538
-
21539
- if (max_line_number < 10) {
21540
- if (colorize) {
21541
- error_format = (pm_error_format_t) {
21542
- .number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
21543
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21544
- .divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
21545
- };
21546
- } else {
21547
- error_format = (pm_error_format_t) {
21548
- .number_prefix = "%1" PRIi32 " | ",
21549
- .blank_prefix = " | ",
21550
- .divider = " ~~~~~\n"
21551
- };
21552
- }
21553
- } else if (max_line_number < 100) {
21554
- if (colorize) {
21555
- error_format = (pm_error_format_t) {
21556
- .number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
21557
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21558
- .divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
21559
- };
21560
- } else {
21561
- error_format = (pm_error_format_t) {
21562
- .number_prefix = "%2" PRIi32 " | ",
21563
- .blank_prefix = " | ",
21564
- .divider = " ~~~~~~\n"
21565
- };
21566
- }
21567
- } else if (max_line_number < 1000) {
21568
- if (colorize) {
21569
- error_format = (pm_error_format_t) {
21570
- .number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
21571
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21572
- .divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
21573
- };
21574
- } else {
21575
- error_format = (pm_error_format_t) {
21576
- .number_prefix = "%3" PRIi32 " | ",
21577
- .blank_prefix = " | ",
21578
- .divider = " ~~~~~~~\n"
21579
- };
21580
- }
21581
- } else if (max_line_number < 10000) {
21582
- if (colorize) {
21583
- error_format = (pm_error_format_t) {
21584
- .number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
21585
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21586
- .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
21587
- };
21588
- } else {
21589
- error_format = (pm_error_format_t) {
21590
- .number_prefix = "%4" PRIi32 " | ",
21591
- .blank_prefix = " | ",
21592
- .divider = " ~~~~~~~~\n"
21593
- };
21594
- }
21595
- } else {
21596
- if (colorize) {
21597
- error_format = (pm_error_format_t) {
21598
- .number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
21599
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21600
- .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
21601
- };
21602
- } else {
21603
- error_format = (pm_error_format_t) {
21604
- .number_prefix = "%5" PRIi32 " | ",
21605
- .blank_prefix = " | ",
21606
- .divider = " ~~~~~~~~\n"
21607
- };
21608
- }
21609
- }
21610
-
21611
- error_format.blank_prefix_length = strlen(error_format.blank_prefix);
21612
- error_format.divider_length = strlen(error_format.divider);
21613
-
21614
- // Now we're going to iterate through every error in our error list and
21615
- // display it. While we're iterating, we will display some padding lines of
21616
- // the source before the error to give some context. We'll be careful not to
21617
- // display the same line twice in case the errors are close enough in the
21618
- // source.
21619
- int32_t last_line = parser->start_line - 1;
21620
- const pm_encoding_t *encoding = parser->encoding;
21621
-
21622
- for (size_t index = 0; index < error_list->size; index++) {
21623
- pm_error_t *error = &errors[index];
21624
-
21625
- // Here we determine how many lines of padding of the source to display,
21626
- // based on the difference from the last line that was displaid.
21627
- if (error->line - last_line > 1) {
21628
- if (error->line - last_line > 2) {
21629
- if ((index != 0) && (error->line - last_line > 3)) {
21630
- pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
21631
- }
21632
-
21633
- pm_buffer_append_string(buffer, " ", 2);
21634
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
21635
- }
21636
-
21637
- pm_buffer_append_string(buffer, " ", 2);
21638
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
21639
- }
21640
-
21641
- // If this is the first error or we're on a new line, then we'll display
21642
- // the line that has the error in it.
21643
- if ((index == 0) || (error->line != last_line)) {
21644
- if (colorize) {
21645
- pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 12);
21646
- } else {
21647
- pm_buffer_append_string(buffer, "> ", 2);
21648
- }
21649
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
21650
- }
21651
-
21652
- const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
21653
- if (start == parser->end) pm_buffer_append_byte(buffer, '\n');
21654
-
21655
- // Now we'll display the actual error message. We'll do this by first
21656
- // putting the prefix to the line, then a bunch of blank spaces
21657
- // depending on the column, then as many carets as we need to display
21658
- // the width of the error, then the error message itself.
21659
- //
21660
- // Note that this doesn't take into account the width of the actual
21661
- // character when displaid in the terminal. For some east-asian
21662
- // languages or emoji, this means it can be thrown off pretty badly. We
21663
- // will need to solve this eventually.
21664
- pm_buffer_append_string(buffer, " ", 2);
21665
- pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
21666
-
21667
- size_t column = 0;
21668
- while (column < error->column_start) {
21669
- pm_buffer_append_byte(buffer, ' ');
21670
-
21671
- size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21672
- column += (char_width == 0 ? 1 : char_width);
21673
- }
21674
-
21675
- if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
21676
- pm_buffer_append_byte(buffer, '^');
21677
-
21678
- size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21679
- column += (char_width == 0 ? 1 : char_width);
21680
-
21681
- while (column < error->column_end) {
21682
- pm_buffer_append_byte(buffer, '~');
21683
-
21684
- size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21685
- column += (char_width == 0 ? 1 : char_width);
21686
- }
21687
-
21688
- if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
21689
-
21690
- if (inline_messages) {
21691
- pm_buffer_append_byte(buffer, ' ');
21692
- assert(error->error != NULL);
21693
-
21694
- const char *message = error->error->message;
21695
- pm_buffer_append_string(buffer, message, strlen(message));
21696
- }
21697
-
21698
- pm_buffer_append_byte(buffer, '\n');
21699
-
21700
- // Here we determine how many lines of padding to display after the
21701
- // error, depending on where the next error is in source.
21702
- last_line = error->line;
21703
- int32_t next_line = (index == error_list->size - 1) ? (((int32_t) newline_list->size) + parser->start_line) : errors[index + 1].line;
21704
-
21705
- if (next_line - last_line > 1) {
21706
- pm_buffer_append_string(buffer, " ", 2);
21707
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
21708
- }
21709
-
21710
- if (next_line - last_line > 1) {
21711
- pm_buffer_append_string(buffer, " ", 2);
21712
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
21713
- }
21714
- }
21715
-
21716
- // Finally, we'll free the array of errors that we allocated.
21717
- xfree(errors);
21718
- }
21719
-
21720
- #undef PM_COLOR_GRAY
21721
- #undef PM_COLOR_RED
21722
- #undef PM_COLOR_RESET