prism 0.28.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +41 -1
  3. data/CONTRIBUTING.md +0 -4
  4. data/README.md +1 -0
  5. data/config.yml +95 -26
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/ripper_translation.md +22 -0
  8. data/ext/prism/api_node.c +70 -52
  9. data/ext/prism/extconf.rb +27 -23
  10. data/ext/prism/extension.c +107 -372
  11. data/ext/prism/extension.h +1 -1
  12. data/include/prism/ast.h +170 -102
  13. data/include/prism/diagnostic.h +18 -3
  14. data/include/prism/node.h +0 -21
  15. data/include/prism/parser.h +23 -25
  16. data/include/prism/regexp.h +17 -8
  17. data/include/prism/static_literals.h +3 -2
  18. data/include/prism/util/pm_char.h +1 -2
  19. data/include/prism/util/pm_constant_pool.h +0 -8
  20. data/include/prism/util/pm_integer.h +16 -9
  21. data/include/prism/util/pm_string.h +0 -8
  22. data/include/prism/version.h +2 -2
  23. data/include/prism.h +0 -11
  24. data/lib/prism/compiler.rb +3 -0
  25. data/lib/prism/desugar_compiler.rb +4 -4
  26. data/lib/prism/dispatcher.rb +14 -0
  27. data/lib/prism/dot_visitor.rb +54 -35
  28. data/lib/prism/dsl.rb +23 -18
  29. data/lib/prism/ffi.rb +25 -4
  30. data/lib/prism/inspect_visitor.rb +26 -24
  31. data/lib/prism/mutation_compiler.rb +6 -1
  32. data/lib/prism/node.rb +314 -389
  33. data/lib/prism/node_ext.rb +175 -17
  34. data/lib/prism/parse_result/comments.rb +1 -8
  35. data/lib/prism/parse_result/newlines.rb +102 -12
  36. data/lib/prism/parse_result.rb +17 -0
  37. data/lib/prism/reflection.rb +11 -9
  38. data/lib/prism/serialize.rb +91 -68
  39. data/lib/prism/translation/parser/compiler.rb +288 -138
  40. data/lib/prism/translation/parser.rb +7 -2
  41. data/lib/prism/translation/ripper.rb +24 -22
  42. data/lib/prism/translation/ruby_parser.rb +32 -14
  43. data/lib/prism/visitor.rb +3 -0
  44. data/lib/prism.rb +0 -4
  45. data/prism.gemspec +2 -4
  46. data/rbi/prism/node.rbi +114 -57
  47. data/rbi/prism/node_ext.rbi +5 -0
  48. data/rbi/prism/parse_result.rbi +1 -1
  49. data/rbi/prism/visitor.rbi +3 -0
  50. data/rbi/prism.rbi +6 -0
  51. data/sig/prism/dsl.rbs +13 -10
  52. data/sig/prism/lex_compat.rbs +10 -0
  53. data/sig/prism/mutation_compiler.rbs +1 -0
  54. data/sig/prism/node.rbs +72 -48
  55. data/sig/prism/node_ext.rbs +4 -0
  56. data/sig/prism/visitor.rbs +1 -0
  57. data/sig/prism.rbs +21 -0
  58. data/src/diagnostic.c +56 -27
  59. data/src/node.c +432 -1690
  60. data/src/prettyprint.c +97 -54
  61. data/src/prism.c +1286 -1196
  62. data/src/regexp.c +133 -68
  63. data/src/serialize.c +22 -17
  64. data/src/static_literals.c +63 -84
  65. data/src/token_type.c +4 -4
  66. data/src/util/pm_constant_pool.c +0 -8
  67. data/src/util/pm_integer.c +39 -11
  68. data/src/util/pm_string.c +0 -12
  69. data/src/util/pm_strpbrk.c +32 -6
  70. metadata +3 -5
  71. data/include/prism/util/pm_string_list.h +0 -44
  72. data/lib/prism/debug.rb +0 -249
  73. data/src/util/pm_string_list.c +0 -28
data/src/prism.c CHANGED
@@ -423,7 +423,7 @@ lex_mode_pop(pm_parser_t *parser) {
423
423
  * This is the equivalent of IS_lex_state is CRuby.
424
424
  */
425
425
  static inline bool
426
- lex_state_p(pm_parser_t *parser, pm_lex_state_t state) {
426
+ lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
427
427
  return parser->lex_state & state;
428
428
  }
429
429
 
@@ -708,7 +708,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
708
708
  .previous = parser->current_scope,
709
709
  .locals = { 0 },
710
710
  .parameters = PM_SCOPE_PARAMETERS_NONE,
711
- .numbered_parameters = PM_SCOPE_NUMBERED_PARAMETERS_NONE,
711
+ .implicit_parameters = { 0 },
712
712
  .shareable_constant = (closed || parser->current_scope == NULL) ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
713
713
  .closed = closed
714
714
  };
@@ -749,42 +749,97 @@ pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
749
749
  return scope;
750
750
  }
751
751
 
752
- static void
753
- pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag) {
752
+ typedef enum {
753
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
754
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
755
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
756
+ } pm_scope_forwarding_param_check_result_t;
757
+
758
+ static pm_scope_forwarding_param_check_result_t
759
+ pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
754
760
  pm_scope_t *scope = parser->current_scope;
755
- while (scope) {
761
+ bool conflict = false;
762
+
763
+ while (scope != NULL) {
756
764
  if (scope->parameters & mask) {
757
- if (!scope->closed) {
758
- pm_parser_err_token(parser, token, diag);
759
- return;
765
+ if (scope->closed) {
766
+ if (conflict) {
767
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
768
+ } else {
769
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
770
+ }
760
771
  }
761
- return;
772
+
773
+ conflict = true;
762
774
  }
775
+
763
776
  if (scope->closed) break;
764
777
  scope = scope->previous;
765
778
  }
766
779
 
767
- pm_parser_err_token(parser, token, diag);
780
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
768
781
  }
769
782
 
770
- static inline void
783
+ static void
771
784
  pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
772
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
785
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
786
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
787
+ // Pass.
788
+ break;
789
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
790
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
791
+ break;
792
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
793
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
794
+ break;
795
+ }
773
796
  }
774
797
 
775
- static inline void
798
+ static void
776
799
  pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
777
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
800
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
801
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
802
+ // Pass.
803
+ break;
804
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
805
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
806
+ break;
807
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
808
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
809
+ break;
810
+ }
778
811
  }
779
812
 
780
- static inline void
781
- pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token) {
782
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
813
+ static void
814
+ pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
815
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
816
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
817
+ // Pass.
818
+ break;
819
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
820
+ // This shouldn't happen, because ... is not allowed in the
821
+ // declaration of blocks. If we get here, we assume we already have
822
+ // an error for this.
823
+ break;
824
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
825
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
826
+ break;
827
+ }
783
828
  }
784
829
 
785
- static inline void
830
+ static void
786
831
  pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
787
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
832
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
833
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
834
+ // Pass.
835
+ break;
836
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
837
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
838
+ break;
839
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
840
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
841
+ break;
842
+ }
788
843
  }
789
844
 
790
845
  /**
@@ -1128,6 +1183,31 @@ pm_check_value_expression(pm_node_t *node) {
1128
1183
  return NULL;
1129
1184
  case PM_BEGIN_NODE: {
1130
1185
  pm_begin_node_t *cast = (pm_begin_node_t *) node;
1186
+
1187
+ if (cast->statements == NULL && cast->ensure_clause != NULL) {
1188
+ node = (pm_node_t *) cast->ensure_clause;
1189
+ }
1190
+ else {
1191
+ if (cast->rescue_clause != NULL) {
1192
+ if (cast->rescue_clause->statements == NULL) {
1193
+ return NULL;
1194
+ }
1195
+ else if (cast->else_clause != NULL) {
1196
+ node = (pm_node_t *) cast->else_clause;
1197
+ }
1198
+ else {
1199
+ node = (pm_node_t *) cast->statements;
1200
+ }
1201
+ }
1202
+ else {
1203
+ node = (pm_node_t *) cast->statements;
1204
+ }
1205
+ }
1206
+
1207
+ break;
1208
+ }
1209
+ case PM_ENSURE_NODE: {
1210
+ pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1131
1211
  node = (pm_node_t *) cast->statements;
1132
1212
  break;
1133
1213
  }
@@ -1575,7 +1655,7 @@ not_provided(pm_parser_t *parser) {
1575
1655
  return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1576
1656
  }
1577
1657
 
1578
- #define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = parser->start, .end = parser->start })
1658
+ #define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1579
1659
  #define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1580
1660
  #define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1581
1661
  #define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
@@ -1703,7 +1783,7 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
1703
1783
  * it's important that it be as fast as possible.
1704
1784
  */
1705
1785
  static inline size_t
1706
- char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
1786
+ char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
1707
1787
  if (parser->encoding_changed) {
1708
1788
  size_t width;
1709
1789
  if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
@@ -2772,8 +2852,7 @@ static pm_call_node_t *
2772
2852
  pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2773
2853
  pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2774
2854
 
2775
- node->base.location.start = parser->start;
2776
- node->base.location.end = parser->start;
2855
+ node->base.location = PM_LOCATION_NULL_VALUE(parser);
2777
2856
  node->arguments = arguments;
2778
2857
 
2779
2858
  node->name = name;
@@ -3025,8 +3104,8 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
3025
3104
  .message_loc = target->message_loc,
3026
3105
  .read_name = 0,
3027
3106
  .write_name = target->name,
3028
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3029
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3107
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3108
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3030
3109
  .value = value
3031
3110
  };
3032
3111
 
@@ -3064,8 +3143,8 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
3064
3143
  .arguments = target->arguments,
3065
3144
  .closing_loc = target->closing_loc,
3066
3145
  .block = target->block,
3067
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3068
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3146
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3147
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3069
3148
  .value = value
3070
3149
  };
3071
3150
 
@@ -3409,9 +3488,9 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
3409
3488
  },
3410
3489
  .name = target->name,
3411
3490
  .name_loc = target->base.location,
3412
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3491
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3413
3492
  .value = value,
3414
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3493
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3415
3494
  };
3416
3495
 
3417
3496
  return node;
@@ -3525,9 +3604,9 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
3525
3604
  }
3526
3605
  },
3527
3606
  .target = target,
3528
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3607
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3529
3608
  .value = value,
3530
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3609
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3531
3610
  };
3532
3611
 
3533
3612
  return node;
@@ -3652,9 +3731,9 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
3652
3731
  },
3653
3732
  .name = target->name,
3654
3733
  .name_loc = target->base.location,
3655
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3734
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3656
3735
  .value = value,
3657
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3736
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3658
3737
  };
3659
3738
 
3660
3739
  return node;
@@ -4236,7 +4315,7 @@ pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4236
4315
  }
4237
4316
 
4238
4317
  /**
4239
- * Allocate and initialize a new FloatNode node from a FLOAT_RATIONAL token.
4318
+ * Allocate and initialize a new RationalNode node from a FLOAT_RATIONAL token.
4240
4319
  */
4241
4320
  static pm_rational_node_t *
4242
4321
  pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
@@ -4246,16 +4325,44 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4246
4325
  *node = (pm_rational_node_t) {
4247
4326
  {
4248
4327
  .type = PM_RATIONAL_NODE,
4249
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
4328
+ .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4250
4329
  .location = PM_LOCATION_TOKEN_VALUE(token)
4251
4330
  },
4252
- .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4253
- .type = PM_TOKEN_FLOAT,
4254
- .start = token->start,
4255
- .end = token->end - 1
4256
- }))
4331
+ .numerator = { 0 },
4332
+ .denominator = { 0 }
4257
4333
  };
4258
4334
 
4335
+ const uint8_t *start = token->start;
4336
+ const uint8_t *end = token->end - 1; // r
4337
+
4338
+ while (start < end && *start == '0') start++; // 0.1 -> .1
4339
+ while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4340
+
4341
+ size_t length = (size_t) (end - start);
4342
+ if (length == 1) {
4343
+ node->denominator.value = 1;
4344
+ return node;
4345
+ }
4346
+
4347
+ const uint8_t *point = memchr(start, '.', length);
4348
+ assert(point && "should have a decimal point");
4349
+
4350
+ uint8_t *digits = malloc(length);
4351
+ if (digits == NULL) {
4352
+ fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4353
+ abort();
4354
+ }
4355
+
4356
+ memcpy(digits, start, (unsigned long) (point - start));
4357
+ memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4358
+ pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4359
+
4360
+ digits[0] = '1';
4361
+ if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4362
+ pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4363
+ free(digits);
4364
+
4365
+ pm_integers_reduce(&node->numerator, &node->denominator);
4259
4366
  return node;
4260
4367
  }
4261
4368
 
@@ -4505,9 +4612,9 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
4505
4612
  },
4506
4613
  .name = pm_global_variable_write_name(parser, target),
4507
4614
  .name_loc = target->location,
4508
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4615
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4509
4616
  .value = value,
4510
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4617
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4511
4618
  };
4512
4619
 
4513
4620
  return node;
@@ -4566,7 +4673,7 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant
4566
4673
  *node = (pm_global_variable_read_node_t) {
4567
4674
  {
4568
4675
  .type = PM_GLOBAL_VARIABLE_READ_NODE,
4569
- .location = { .start = parser->start, .end = parser->start }
4676
+ .location = PM_LOCATION_NULL_VALUE(parser)
4570
4677
  },
4571
4678
  .name = name
4572
4679
  };
@@ -4608,11 +4715,11 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan
4608
4715
  *node = (pm_global_variable_write_node_t) {
4609
4716
  {
4610
4717
  .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4611
- .location = { .start = parser->start, .end = parser->start }
4718
+ .location = PM_LOCATION_NULL_VALUE(parser)
4612
4719
  },
4613
4720
  .name = name,
4614
- .name_loc = { .start = parser->start, .end = parser->start },
4615
- .operator_loc = { .start = parser->start, .end = parser->start },
4721
+ .name_loc = PM_LOCATION_NULL_VALUE(parser),
4722
+ .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4616
4723
  .value = value
4617
4724
  };
4618
4725
 
@@ -4889,7 +4996,7 @@ pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, cons
4889
4996
  }
4890
4997
 
4891
4998
  /**
4892
- * Allocate and initialize a new IntegerNode node from an INTEGER_RATIONAL
4999
+ * Allocate and initialize a new RationalNode node from an INTEGER_RATIONAL
4893
5000
  * token.
4894
5001
  */
4895
5002
  static pm_rational_node_t *
@@ -4900,16 +5007,24 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const
4900
5007
  *node = (pm_rational_node_t) {
4901
5008
  {
4902
5009
  .type = PM_RATIONAL_NODE,
4903
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
5010
+ .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4904
5011
  .location = PM_LOCATION_TOKEN_VALUE(token)
4905
5012
  },
4906
- .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4907
- .type = PM_TOKEN_INTEGER,
4908
- .start = token->start,
4909
- .end = token->end - 1
4910
- }))
5013
+ .numerator = { 0 },
5014
+ .denominator = { .value = 1, 0 }
4911
5015
  };
4912
5016
 
5017
+ pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
5018
+ switch (base) {
5019
+ case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
5020
+ case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
5021
+ case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
5022
+ case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
5023
+ default: assert(false && "unreachable"); break;
5024
+ }
5025
+
5026
+ pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
5027
+
4913
5028
  return node;
4914
5029
  }
4915
5030
 
@@ -5013,9 +5128,9 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
5013
5128
  },
5014
5129
  .name = target->name,
5015
5130
  .name_loc = target->base.location,
5016
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5131
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5017
5132
  .value = value,
5018
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5133
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5019
5134
  };
5020
5135
 
5021
5136
  return node;
@@ -5407,6 +5522,23 @@ pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node,
5407
5522
  node->base.location.end = closing->end;
5408
5523
  }
5409
5524
 
5525
+ /**
5526
+ * Create a local variable read that is reading the implicit 'it' variable.
5527
+ */
5528
+ static pm_it_local_variable_read_node_t *
5529
+ pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5530
+ pm_it_local_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_it_local_variable_read_node_t);
5531
+
5532
+ *node = (pm_it_local_variable_read_node_t) {
5533
+ {
5534
+ .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5535
+ .location = PM_LOCATION_TOKEN_VALUE(name)
5536
+ }
5537
+ };
5538
+
5539
+ return node;
5540
+ }
5541
+
5410
5542
  /**
5411
5543
  * Allocate and initialize a new ItParametersNode node.
5412
5544
  */
@@ -5609,10 +5741,10 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
5609
5741
  }
5610
5742
  },
5611
5743
  .name_loc = target->location,
5612
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5744
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5613
5745
  .value = value,
5614
5746
  .name = name,
5615
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5747
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5616
5748
  .depth = depth
5617
5749
  };
5618
5750
 
@@ -5719,28 +5851,6 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5719
5851
  return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5720
5852
  }
5721
5853
 
5722
- /**
5723
- * Returns true if the given node is `it` default parameter.
5724
- */
5725
- static inline bool
5726
- pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
5727
- // Check if it's a local variable reference
5728
- if (node->type != PM_CALL_NODE) {
5729
- return false;
5730
- }
5731
-
5732
- // Check if it's a variable call
5733
- pm_call_node_t *call_node = (pm_call_node_t *) node;
5734
- if (!PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
5735
- return false;
5736
- }
5737
-
5738
- // Check if it's called `it`
5739
- pm_constant_id_t id = ((pm_call_node_t *)node)->name;
5740
- pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
5741
- return pm_token_is_it(constant->start, constant->start + constant->length);
5742
- }
5743
-
5744
5854
  /**
5745
5855
  * Returns true if the given bounds comprise a numbered parameter (i.e., they
5746
5856
  * are of the form /^_\d$/).
@@ -6891,7 +7001,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
6891
7001
  case PM_REDO_NODE:
6892
7002
  case PM_RETRY_NODE:
6893
7003
  case PM_RETURN_NODE:
6894
- pm_parser_warn_node(parser, previous, PM_WARN_UNREACHABLE_STATEMENT);
7004
+ pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6895
7005
  break;
6896
7006
  default:
6897
7007
  break;
@@ -7300,9 +7410,9 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7300
7410
  {
7301
7411
  .type = PM_SYMBOL_NODE,
7302
7412
  .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7303
- .location = { .start = parser->start, .end = parser->start }
7413
+ .location = PM_LOCATION_NULL_VALUE(parser)
7304
7414
  },
7305
- .value_loc = { .start = parser->start, .end = parser->start },
7415
+ .value_loc = PM_LOCATION_NULL_VALUE(parser),
7306
7416
  .unescaped = { 0 }
7307
7417
  };
7308
7418
 
@@ -7703,10 +7813,10 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
7703
7813
  *node = (pm_while_node_t) {
7704
7814
  {
7705
7815
  .type = PM_WHILE_NODE,
7706
- .location = { .start = parser->start, .end = parser->start }
7816
+ .location = PM_LOCATION_NULL_VALUE(parser)
7707
7817
  },
7708
- .keyword_loc = { .start = parser->start, .end = parser->start },
7709
- .closing_loc = { .start = parser->start, .end = parser->start },
7818
+ .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7819
+ .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7710
7820
  .predicate = predicate,
7711
7821
  .statements = statements
7712
7822
  };
@@ -7861,51 +7971,6 @@ pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t leng
7861
7971
  return constant_id;
7862
7972
  }
7863
7973
 
7864
- /**
7865
- * Create a local variable read that is reading the implicit 'it' variable.
7866
- */
7867
- static pm_local_variable_read_node_t *
7868
- pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *name) {
7869
- if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_ORDINARY) {
7870
- pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
7871
- return NULL;
7872
- }
7873
-
7874
- if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED) {
7875
- pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
7876
- return NULL;
7877
- }
7878
-
7879
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IT;
7880
-
7881
- pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
7882
- pm_parser_local_add(parser, name_id, name->start, name->end, 0);
7883
-
7884
- return pm_local_variable_read_node_create_constant_id(parser, name, name_id, 0, false);
7885
- }
7886
-
7887
- /**
7888
- * Convert a `it` variable call node to a node for `it` default parameter.
7889
- */
7890
- static pm_node_t *
7891
- pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
7892
- if (
7893
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
7894
- !parser->current_scope->closed &&
7895
- (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
7896
- pm_node_is_it(parser, node)
7897
- ) {
7898
- pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
7899
-
7900
- if (read != NULL) {
7901
- pm_node_destroy(parser, node);
7902
- node = (pm_node_t *) read;
7903
- }
7904
- }
7905
-
7906
- return node;
7907
- }
7908
-
7909
7974
  /**
7910
7975
  * Add a parameter name to the current scope and check whether the name of the
7911
7976
  * parameter is unique or not.
@@ -7941,6 +8006,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
7941
8006
  pm_scope_t *scope = parser->current_scope;
7942
8007
  parser->current_scope = scope->previous;
7943
8008
  pm_locals_free(&scope->locals);
8009
+ pm_node_list_free(&scope->implicit_parameters);
7944
8010
  xfree(scope);
7945
8011
  }
7946
8012
 
@@ -8012,7 +8078,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) {
8012
8078
  * is beyond the end of the source then return '\0'.
8013
8079
  */
8014
8080
  static inline uint8_t
8015
- peek_at(pm_parser_t *parser, const uint8_t *cursor) {
8081
+ peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8016
8082
  if (cursor < parser->end) {
8017
8083
  return *cursor;
8018
8084
  } else {
@@ -8035,7 +8101,7 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8035
8101
  * that position is beyond the end of the source then return '\0'.
8036
8102
  */
8037
8103
  static inline uint8_t
8038
- peek(pm_parser_t *parser) {
8104
+ peek(const pm_parser_t *parser) {
8039
8105
  return peek_at(parser, parser->current.end);
8040
8106
  }
8041
8107
 
@@ -8100,6 +8166,14 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
8100
8166
  return memchr(cursor, '\n', (size_t) length);
8101
8167
  }
8102
8168
 
8169
+ /**
8170
+ * This is equivalent to the predicate of warn_balanced in CRuby.
8171
+ */
8172
+ static inline bool
8173
+ ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8174
+ return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8175
+ }
8176
+
8103
8177
  /**
8104
8178
  * Here we're going to check if this is a "magic" comment, and perform whatever
8105
8179
  * actions are necessary for it here.
@@ -8339,7 +8413,12 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8339
8413
  // If we have hit a ractor pragma, attempt to lex that.
8340
8414
  uint32_t value_length = (uint32_t) (value_end - value_start);
8341
8415
  if (key_length == 24 && pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8342
- if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8416
+ const uint8_t *cursor = parser->current.start;
8417
+ while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8418
+
8419
+ if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8420
+ pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8421
+ } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8343
8422
  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8344
8423
  } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8345
8424
  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
@@ -8796,6 +8875,16 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8796
8875
  type = lex_optional_float_suffix(parser, seen_e);
8797
8876
  }
8798
8877
 
8878
+ // At this point we have a completed number, but we want to provide the user
8879
+ // with a good experience if they put an additional .xxx fractional
8880
+ // component on the end, so we'll check for that here.
8881
+ if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8882
+ const uint8_t *fraction_start = parser->current.end;
8883
+ const uint8_t *fraction_end = parser->current.end + 2;
8884
+ fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8885
+ pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8886
+ }
8887
+
8799
8888
  return type;
8800
8889
  }
8801
8890
 
@@ -8925,8 +9014,8 @@ lex_global_variable(pm_parser_t *parser) {
8925
9014
  // If we get here, then we have a $ followed by something that
8926
9015
  // isn't recognized as a global variable.
8927
9016
  pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8928
- size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8929
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
9017
+ const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9018
+ PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
8930
9019
  }
8931
9020
 
8932
9021
  return PM_TOKEN_GLOBAL_VARIABLE;
@@ -9297,12 +9386,20 @@ escape_hexadecimal_digit(const uint8_t value) {
9297
9386
  * validated.
9298
9387
  */
9299
9388
  static inline uint32_t
9300
- escape_unicode(const uint8_t *string, size_t length) {
9389
+ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9301
9390
  uint32_t value = 0;
9302
9391
  for (size_t index = 0; index < length; index++) {
9303
9392
  if (index != 0) value <<= 4;
9304
9393
  value |= escape_hexadecimal_digit(string[index]);
9305
9394
  }
9395
+
9396
+ // Here we're going to verify that the value is actually a valid Unicode
9397
+ // codepoint and not a surrogate pair.
9398
+ if (value >= 0xD800 && value <= 0xDFFF) {
9399
+ pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9400
+ return 0xFFFD;
9401
+ }
9402
+
9306
9403
  return value;
9307
9404
  }
9308
9405
 
@@ -9311,7 +9408,7 @@ escape_unicode(const uint8_t *string, size_t length) {
9311
9408
  */
9312
9409
  static inline uint8_t
9313
9410
  escape_byte(uint8_t value, const uint8_t flags) {
9314
- if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x1f;
9411
+ if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9315
9412
  if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9316
9413
  return value;
9317
9414
  }
@@ -9411,22 +9508,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
9411
9508
  static inline void
9412
9509
  escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9413
9510
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9414
- pm_buffer_append_bytes(regular_expression_buffer, (const uint8_t *) "\\x", 2);
9415
-
9416
- uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
9417
- uint8_t byte2 = (uint8_t) (byte & 0xF);
9418
-
9419
- if (byte1 >= 0xA) {
9420
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
9421
- } else {
9422
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte1 + '0'));
9423
- }
9424
-
9425
- if (byte2 >= 0xA) {
9426
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 - 0xA + 'A'));
9427
- } else {
9428
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 + '0'));
9429
- }
9511
+ pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9430
9512
  }
9431
9513
 
9432
9514
  escape_write_byte_encoded(parser, buffer, byte);
@@ -9461,57 +9543,57 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9461
9543
  switch (peek(parser)) {
9462
9544
  case '\\': {
9463
9545
  parser->current.end++;
9464
- escape_write_byte_encoded(parser, buffer, escape_byte('\\', flags));
9546
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9465
9547
  return;
9466
9548
  }
9467
9549
  case '\'': {
9468
9550
  parser->current.end++;
9469
- escape_write_byte_encoded(parser, buffer, escape_byte('\'', flags));
9551
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9470
9552
  return;
9471
9553
  }
9472
9554
  case 'a': {
9473
9555
  parser->current.end++;
9474
- escape_write_byte_encoded(parser, buffer, escape_byte('\a', flags));
9556
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9475
9557
  return;
9476
9558
  }
9477
9559
  case 'b': {
9478
9560
  parser->current.end++;
9479
- escape_write_byte_encoded(parser, buffer, escape_byte('\b', flags));
9561
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9480
9562
  return;
9481
9563
  }
9482
9564
  case 'e': {
9483
9565
  parser->current.end++;
9484
- escape_write_byte_encoded(parser, buffer, escape_byte('\033', flags));
9566
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9485
9567
  return;
9486
9568
  }
9487
9569
  case 'f': {
9488
9570
  parser->current.end++;
9489
- escape_write_byte_encoded(parser, buffer, escape_byte('\f', flags));
9571
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9490
9572
  return;
9491
9573
  }
9492
9574
  case 'n': {
9493
9575
  parser->current.end++;
9494
- escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
9576
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9495
9577
  return;
9496
9578
  }
9497
9579
  case 'r': {
9498
9580
  parser->current.end++;
9499
- escape_write_byte_encoded(parser, buffer, escape_byte('\r', flags));
9581
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9500
9582
  return;
9501
9583
  }
9502
9584
  case 's': {
9503
9585
  parser->current.end++;
9504
- escape_write_byte_encoded(parser, buffer, escape_byte(' ', flags));
9586
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9505
9587
  return;
9506
9588
  }
9507
9589
  case 't': {
9508
9590
  parser->current.end++;
9509
- escape_write_byte_encoded(parser, buffer, escape_byte('\t', flags));
9591
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9510
9592
  return;
9511
9593
  }
9512
9594
  case 'v': {
9513
9595
  parser->current.end++;
9514
- escape_write_byte_encoded(parser, buffer, escape_byte('\v', flags));
9596
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9515
9597
  return;
9516
9598
  }
9517
9599
  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
@@ -9528,7 +9610,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9528
9610
  }
9529
9611
  }
9530
9612
 
9531
- escape_write_byte_encoded(parser, buffer, value);
9613
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9532
9614
  return;
9533
9615
  }
9534
9616
  case 'x': {
@@ -9547,8 +9629,13 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9547
9629
  parser->current.end++;
9548
9630
  }
9549
9631
 
9632
+ value = escape_byte(value, flags);
9550
9633
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9551
- pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9634
+ if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9635
+ pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9636
+ } else {
9637
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9638
+ }
9552
9639
  }
9553
9640
 
9554
9641
  escape_write_byte_encoded(parser, buffer, value);
@@ -9580,7 +9667,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9580
9667
  pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9581
9668
  } else if (hexadecimal_length == 0) {
9582
9669
  // there are not hexadecimal characters
9583
- pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE);
9670
+ pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9671
+ pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9584
9672
  return;
9585
9673
  }
9586
9674
 
@@ -9590,7 +9678,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9590
9678
  extra_codepoints_start = unicode_start;
9591
9679
  }
9592
9680
 
9593
- uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
9681
+ uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9594
9682
  escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9595
9683
 
9596
9684
  parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
@@ -9615,7 +9703,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9615
9703
  size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9616
9704
 
9617
9705
  if (length == 4) {
9618
- uint32_t value = escape_unicode(parser->current.end, 4);
9706
+ uint32_t value = escape_unicode(parser, parser->current.end, 4);
9619
9707
 
9620
9708
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9621
9709
  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
@@ -9651,6 +9739,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9651
9739
  return;
9652
9740
  }
9653
9741
  parser->current.end++;
9742
+
9743
+ if (match(parser, 'u') || match(parser, 'U')) {
9744
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9745
+ return;
9746
+ }
9747
+
9654
9748
  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9655
9749
  return;
9656
9750
  case ' ':
@@ -9678,7 +9772,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9678
9772
  case 'C': {
9679
9773
  parser->current.end++;
9680
9774
  if (peek(parser) != '-') {
9681
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9775
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9776
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9682
9777
  return;
9683
9778
  }
9684
9779
 
@@ -9701,6 +9796,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9701
9796
  return;
9702
9797
  }
9703
9798
  parser->current.end++;
9799
+
9800
+ if (match(parser, 'u') || match(parser, 'U')) {
9801
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9802
+ return;
9803
+ }
9804
+
9704
9805
  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9705
9806
  return;
9706
9807
  case ' ':
@@ -9715,7 +9816,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9715
9816
  return;
9716
9817
  default: {
9717
9818
  if (!char_is_ascii_printable(peeked)) {
9718
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9819
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9820
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9719
9821
  return;
9720
9822
  }
9721
9823
 
@@ -9728,7 +9830,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9728
9830
  case 'M': {
9729
9831
  parser->current.end++;
9730
9832
  if (peek(parser) != '-') {
9731
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9833
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9834
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9732
9835
  return;
9733
9836
  }
9734
9837
 
@@ -9746,6 +9849,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9746
9849
  return;
9747
9850
  }
9748
9851
  parser->current.end++;
9852
+
9853
+ if (match(parser, 'u') || match(parser, 'U')) {
9854
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9855
+ return;
9856
+ }
9857
+
9749
9858
  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9750
9859
  return;
9751
9860
  case ' ':
@@ -9760,7 +9869,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9760
9869
  return;
9761
9870
  default:
9762
9871
  if (!char_is_ascii_printable(peeked)) {
9763
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9872
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9873
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9764
9874
  return;
9765
9875
  }
9766
9876
 
@@ -10721,6 +10831,8 @@ parser_lex(pm_parser_t *parser) {
10721
10831
  type = PM_TOKEN_USTAR_STAR;
10722
10832
  } else if (lex_state_beg_p(parser)) {
10723
10833
  type = PM_TOKEN_USTAR_STAR;
10834
+ } else if (ambiguous_operator_p(parser, space_seen)) {
10835
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10724
10836
  }
10725
10837
 
10726
10838
  if (lex_state_operator_p(parser)) {
@@ -10744,6 +10856,8 @@ parser_lex(pm_parser_t *parser) {
10744
10856
  type = PM_TOKEN_USTAR;
10745
10857
  } else if (lex_state_beg_p(parser)) {
10746
10858
  type = PM_TOKEN_USTAR;
10859
+ } else if (ambiguous_operator_p(parser, space_seen)) {
10860
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
10747
10861
  }
10748
10862
 
10749
10863
  if (lex_state_operator_p(parser)) {
@@ -10860,6 +10974,7 @@ parser_lex(pm_parser_t *parser) {
10860
10974
  // If we have quotes, then we're going to go until we find the
10861
10975
  // end quote.
10862
10976
  while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
10977
+ if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
10863
10978
  parser->current.end++;
10864
10979
  }
10865
10980
  }
@@ -10917,6 +11032,10 @@ parser_lex(pm_parser_t *parser) {
10917
11032
  LEX(PM_TOKEN_LESS_LESS_EQUAL);
10918
11033
  }
10919
11034
 
11035
+ if (ambiguous_operator_p(parser, space_seen)) {
11036
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11037
+ }
11038
+
10920
11039
  if (lex_state_operator_p(parser)) {
10921
11040
  lex_state_set(parser, PM_LEX_STATE_ARG);
10922
11041
  } else {
@@ -11030,6 +11149,8 @@ parser_lex(pm_parser_t *parser) {
11030
11149
  type = PM_TOKEN_UAMPERSAND;
11031
11150
  } else if (lex_state_beg_p(parser)) {
11032
11151
  type = PM_TOKEN_UAMPERSAND;
11152
+ } else if (ambiguous_operator_p(parser, space_seen)) {
11153
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11033
11154
  }
11034
11155
 
11035
11156
  if (lex_state_operator_p(parser)) {
@@ -11104,6 +11225,10 @@ parser_lex(pm_parser_t *parser) {
11104
11225
  LEX(PM_TOKEN_UPLUS);
11105
11226
  }
11106
11227
 
11228
+ if (ambiguous_operator_p(parser, space_seen)) {
11229
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11230
+ }
11231
+
11107
11232
  lex_state_set(parser, PM_LEX_STATE_BEG);
11108
11233
  LEX(PM_TOKEN_PLUS);
11109
11234
  }
@@ -11141,6 +11266,10 @@ parser_lex(pm_parser_t *parser) {
11141
11266
  LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
11142
11267
  }
11143
11268
 
11269
+ if (ambiguous_operator_p(parser, space_seen)) {
11270
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11271
+ }
11272
+
11144
11273
  lex_state_set(parser, PM_LEX_STATE_BEG);
11145
11274
  LEX(PM_TOKEN_MINUS);
11146
11275
  }
@@ -11239,6 +11368,10 @@ parser_lex(pm_parser_t *parser) {
11239
11368
  LEX(PM_TOKEN_REGEXP_BEGIN);
11240
11369
  }
11241
11370
 
11371
+ if (ambiguous_operator_p(parser, space_seen)) {
11372
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11373
+ }
11374
+
11242
11375
  if (lex_state_operator_p(parser)) {
11243
11376
  lex_state_set(parser, PM_LEX_STATE_ARG);
11244
11377
  } else {
@@ -11274,7 +11407,7 @@ parser_lex(pm_parser_t *parser) {
11274
11407
  // operator because we don't want to move into the string
11275
11408
  // lex mode unnecessarily.
11276
11409
  if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11277
- pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11410
+ pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11278
11411
  LEX(PM_TOKEN_PERCENT);
11279
11412
  }
11280
11413
 
@@ -11293,10 +11426,7 @@ parser_lex(pm_parser_t *parser) {
11293
11426
 
11294
11427
  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11295
11428
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11296
-
11297
- if (parser->current.end < parser->end) {
11298
- LEX(PM_TOKEN_STRING_BEGIN);
11299
- }
11429
+ LEX(PM_TOKEN_STRING_BEGIN);
11300
11430
  }
11301
11431
 
11302
11432
  // Delimiters for %-literals cannot be alphanumeric. We
@@ -11423,6 +11553,10 @@ parser_lex(pm_parser_t *parser) {
11423
11553
  }
11424
11554
  }
11425
11555
 
11556
+ if (ambiguous_operator_p(parser, space_seen)) {
11557
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11558
+ }
11559
+
11426
11560
  lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11427
11561
  LEX(PM_TOKEN_PERCENT);
11428
11562
  }
@@ -12233,9 +12367,10 @@ parser_lex(pm_parser_t *parser) {
12233
12367
 
12234
12368
  // If we are immediately following a newline and we have hit the
12235
12369
  // terminator, then we need to return the ending of the heredoc.
12236
- if (!line_continuation && current_token_starts_line(parser)) {
12370
+ if (current_token_starts_line(parser)) {
12237
12371
  const uint8_t *start = parser->current.start;
12238
- if (start + ident_length <= parser->end) {
12372
+
12373
+ if (!line_continuation && (start + ident_length <= parser->end)) {
12239
12374
  const uint8_t *newline = next_newline(start, parser->end - start);
12240
12375
  const uint8_t *ident_end = newline;
12241
12376
  const uint8_t *terminator_end = newline;
@@ -12391,11 +12526,8 @@ parser_lex(pm_parser_t *parser) {
12391
12526
  }
12392
12527
 
12393
12528
  parser->current.end = breakpoint + 1;
12394
-
12395
- if (!was_line_continuation) {
12396
- pm_token_buffer_flush(parser, &token_buffer);
12397
- LEX(PM_TOKEN_STRING_CONTENT);
12398
- }
12529
+ pm_token_buffer_flush(parser, &token_buffer);
12530
+ LEX(PM_TOKEN_STRING_CONTENT);
12399
12531
  }
12400
12532
 
12401
12533
  // Otherwise we hit a newline and it wasn't followed by
@@ -13030,11 +13162,40 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13030
13162
  return (pm_node_t *) result;
13031
13163
  }
13032
13164
 
13165
+ /**
13166
+ * When an implicit local variable is written to or targeted, it becomes a
13167
+ * regular, named local variable. This function removes it from the list of
13168
+ * implicit parameters when that happens.
13169
+ */
13170
+ static void
13171
+ parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13172
+ pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13173
+
13174
+ for (size_t index = 0; index < implicit_parameters->size; index++) {
13175
+ if (implicit_parameters->nodes[index] == node) {
13176
+ // If the node is not the last one in the list, we need to shift the
13177
+ // remaining nodes down to fill the gap. This is extremely unlikely
13178
+ // to happen.
13179
+ if (index != implicit_parameters->size - 1) {
13180
+ memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13181
+ }
13182
+
13183
+ implicit_parameters->size--;
13184
+ break;
13185
+ }
13186
+ }
13187
+ }
13188
+
13033
13189
  /**
13034
13190
  * Convert the given node into a valid target node.
13191
+ *
13192
+ * @param multiple Whether or not this target is part of a larger set of
13193
+ * targets. If it is, then the &. operator is not allowed.
13194
+ * @param splat Whether or not this target is a child of a splat target. If it
13195
+ * is, then fewer patterns are allowed.
13035
13196
  */
13036
13197
  static pm_node_t *
13037
- parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13198
+ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13038
13199
  switch (PM_NODE_TYPE(target)) {
13039
13200
  case PM_MISSING_NODE:
13040
13201
  return target;
@@ -13080,7 +13241,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13080
13241
  target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
13081
13242
  return target;
13082
13243
  case PM_LOCAL_VARIABLE_READ_NODE: {
13083
- pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
13244
+ if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13245
+ PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13246
+ parse_target_implicit_parameter(parser, target);
13247
+ }
13084
13248
 
13085
13249
  const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13086
13250
  uint32_t name = cast->name;
@@ -13092,17 +13256,32 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13092
13256
 
13093
13257
  return target;
13094
13258
  }
13259
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13260
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13261
+ pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13262
+
13263
+ parse_target_implicit_parameter(parser, target);
13264
+ pm_node_destroy(parser, target);
13265
+
13266
+ return node;
13267
+ }
13095
13268
  case PM_INSTANCE_VARIABLE_READ_NODE:
13096
13269
  assert(sizeof(pm_instance_variable_target_node_t) == sizeof(pm_instance_variable_read_node_t));
13097
13270
  target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
13098
13271
  return target;
13099
13272
  case PM_MULTI_TARGET_NODE:
13273
+ if (splat_parent) {
13274
+ // Multi target is not accepted in all positions. If this is one
13275
+ // of them, then we need to add an error.
13276
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13277
+ }
13278
+
13100
13279
  return target;
13101
13280
  case PM_SPLAT_NODE: {
13102
13281
  pm_splat_node_t *splat = (pm_splat_node_t *) target;
13103
13282
 
13104
13283
  if (splat->expression != NULL) {
13105
- splat->expression = parse_target(parser, splat->expression, multiple);
13284
+ splat->expression = parse_target(parser, splat->expression, multiple, true);
13106
13285
  }
13107
13286
 
13108
13287
  return (pm_node_t *) splat;
@@ -13172,9 +13351,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13172
13351
  */
13173
13352
  static pm_node_t *
13174
13353
  parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13175
- pm_node_t *result = parse_target(parser, target, multiple);
13354
+ pm_node_t *result = parse_target(parser, target, multiple, false);
13176
13355
 
13177
- // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
13356
+ // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13357
+ // parens after the targets.
13178
13358
  if (
13179
13359
  !match1(parser, PM_TOKEN_EQUAL) &&
13180
13360
  !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
@@ -13244,18 +13424,34 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13244
13424
  return (pm_node_t *) node;
13245
13425
  }
13246
13426
  case PM_LOCAL_VARIABLE_READ_NODE: {
13247
- pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
13248
13427
  pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
13249
13428
 
13250
13429
  pm_constant_id_t name = local_read->name;
13430
+ pm_location_t name_loc = target->location;
13431
+
13251
13432
  uint32_t depth = local_read->depth;
13252
- pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13433
+ pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13253
13434
 
13254
- pm_location_t name_loc = target->location;
13435
+ if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13436
+ pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13437
+ PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13438
+ parse_target_implicit_parameter(parser, target);
13439
+ }
13440
+
13441
+ pm_locals_unread(&scope->locals, name);
13255
13442
  pm_node_destroy(parser, target);
13256
13443
 
13257
13444
  return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13258
13445
  }
13446
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13447
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13448
+ pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13449
+
13450
+ parse_target_implicit_parameter(parser, target);
13451
+ pm_node_destroy(parser, target);
13452
+
13453
+ return node;
13454
+ }
13259
13455
  case PM_INSTANCE_VARIABLE_READ_NODE: {
13260
13456
  pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13261
13457
  pm_node_destroy(parser, target);
@@ -13409,7 +13605,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13409
13605
  bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13410
13606
 
13411
13607
  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13412
- pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
13608
+ pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13413
13609
 
13414
13610
  while (accept1(parser, PM_TOKEN_COMMA)) {
13415
13611
  if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -13425,7 +13621,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13425
13621
 
13426
13622
  if (token_begins_expression_p(parser->current.type)) {
13427
13623
  name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
13428
- name = parse_target(parser, name, true);
13624
+ name = parse_target(parser, name, true, true);
13429
13625
  }
13430
13626
 
13431
13627
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
@@ -13433,7 +13629,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13433
13629
  has_rest = true;
13434
13630
  } else if (token_begins_expression_p(parser->current.type)) {
13435
13631
  pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
13436
- target = parse_target(parser, target, true);
13632
+ target = parse_target(parser, target, true, false);
13437
13633
 
13438
13634
  pm_multi_target_node_targets_append(parser, result, target);
13439
13635
  } else if (!match1(parser, PM_TOKEN_EOF)) {
@@ -13470,8 +13666,8 @@ parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_
13470
13666
  */
13471
13667
  static pm_statements_node_t *
13472
13668
  parse_statements(pm_parser_t *parser, pm_context_t context) {
13473
- // First, skip past any optional terminators that might be at the beginning of
13474
- // the statements.
13669
+ // First, skip past any optional terminators that might be at the beginning
13670
+ // of the statements.
13475
13671
  while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13476
13672
 
13477
13673
  // If we have a terminator, then we can just return NULL.
@@ -13487,20 +13683,20 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13487
13683
  pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
13488
13684
  pm_statements_node_body_append(parser, statements, node);
13489
13685
 
13490
- // If we're recovering from a syntax error, then we need to stop parsing the
13491
- // statements now.
13686
+ // If we're recovering from a syntax error, then we need to stop parsing
13687
+ // the statements now.
13492
13688
  if (parser->recovering) {
13493
- // If this is the level of context where the recovery has happened, then
13494
- // we can mark the parser as done recovering.
13689
+ // If this is the level of context where the recovery has happened,
13690
+ // then we can mark the parser as done recovering.
13495
13691
  if (context_terminator(context, &parser->current)) parser->recovering = false;
13496
13692
  break;
13497
13693
  }
13498
13694
 
13499
- // If we have a terminator, then we will parse all consecutive terminators
13500
- // and then continue parsing the statements list.
13695
+ // If we have a terminator, then we will parse all consecutive
13696
+ // terminators and then continue parsing the statements list.
13501
13697
  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13502
- // If we have a terminator, then we will continue parsing the statements
13503
- // list.
13698
+ // If we have a terminator, then we will continue parsing the
13699
+ // statements list.
13504
13700
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13505
13701
  if (context_terminator(context, &parser->current)) break;
13506
13702
 
@@ -13508,27 +13704,28 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13508
13704
  continue;
13509
13705
  }
13510
13706
 
13511
- // At this point we have a list of statements that are not terminated by a
13512
- // newline or semicolon. At this point we need to check if we're at the end
13513
- // of the statements list. If we are, then we should break out of the loop.
13707
+ // At this point we have a list of statements that are not terminated by
13708
+ // a newline or semicolon. At this point we need to check if we're at
13709
+ // the end of the statements list. If we are, then we should break out
13710
+ // of the loop.
13514
13711
  if (context_terminator(context, &parser->current)) break;
13515
13712
 
13516
13713
  // At this point, we have a syntax error, because the statement was not
13517
13714
  // terminated by a newline or semicolon, and we're not at the end of the
13518
- // statements list. Ideally we should scan forward to determine if we should
13519
- // insert a missing terminator or break out of parsing the statements list
13520
- // at this point.
13715
+ // statements list. Ideally we should scan forward to determine if we
13716
+ // should insert a missing terminator or break out of parsing the
13717
+ // statements list at this point.
13521
13718
  //
13522
- // We don't have that yet, so instead we'll do a more naive approach. If we
13523
- // were unable to parse an expression, then we will skip past this token and
13524
- // continue parsing the statements list. Otherwise we'll add an error and
13525
- // continue parsing the statements list.
13719
+ // We don't have that yet, so instead we'll do a more naive approach. If
13720
+ // we were unable to parse an expression, then we will skip past this
13721
+ // token and continue parsing the statements list. Otherwise we'll add
13722
+ // an error and continue parsing the statements list.
13526
13723
  if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13527
13724
  parser_lex(parser);
13528
13725
 
13529
13726
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13530
13727
  if (context_terminator(context, &parser->current)) break;
13531
- } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
13728
+ } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13532
13729
  // This is an inlined version of accept1 because the error that we
13533
13730
  // want to add has varargs. If this happens again, we should
13534
13731
  // probably extract a helper function.
@@ -13550,7 +13747,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13550
13747
  */
13551
13748
  static void
13552
13749
  pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13553
- const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
13750
+ const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13554
13751
 
13555
13752
  if (duplicated != NULL) {
13556
13753
  pm_buffer_t buffer = { 0 };
@@ -13576,13 +13773,16 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
13576
13773
  */
13577
13774
  static void
13578
13775
  pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13579
- if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) {
13776
+ pm_node_t *previous;
13777
+
13778
+ if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
13580
13779
  pm_diagnostic_list_append_format(
13581
13780
  &parser->warning_list,
13582
13781
  node->location.start,
13583
13782
  node->location.end,
13584
13783
  PM_WARN_DUPLICATED_WHEN_CLAUSE,
13585
- pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
13784
+ pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
13785
+ pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
13586
13786
  );
13587
13787
  }
13588
13788
  }
@@ -14036,31 +14236,37 @@ static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14036
14236
  * Check if current parameter follows valid parameters ordering. If not it adds
14037
14237
  * an error to the list without stopping the parsing, otherwise sets the
14038
14238
  * parameters state to the one corresponding to the current parameter.
14239
+ *
14240
+ * It returns true if it was successful, and false otherwise.
14039
14241
  */
14040
- static void
14242
+ static bool
14041
14243
  update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14042
14244
  pm_parameters_order_t state = parameters_ordering[token->type];
14043
- if (state == PM_PARAMETERS_NO_CHANGE) return;
14245
+ if (state == PM_PARAMETERS_NO_CHANGE) return true;
14044
14246
 
14045
14247
  // If we see another ordered argument after a optional argument
14046
14248
  // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14047
14249
  if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14048
14250
  *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14049
- return;
14251
+ return true;
14050
14252
  } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14051
- return;
14253
+ return true;
14052
14254
  }
14053
14255
 
14054
14256
  if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14055
14257
  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14056
- }
14057
-
14058
- if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14258
+ return false;
14259
+ } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14260
+ pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14261
+ return false;
14262
+ } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14059
14263
  // We know what transition we failed on, so we can provide a better error here.
14060
14264
  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14061
- } else if (state < *current) {
14062
- *current = state;
14265
+ return false;
14063
14266
  }
14267
+
14268
+ if (state < *current) *current = state;
14269
+ return true;
14064
14270
  }
14065
14271
 
14066
14272
  /**
@@ -14129,27 +14335,22 @@ parse_parameters(
14129
14335
  pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14130
14336
  }
14131
14337
 
14132
- if (order > PM_PARAMETERS_ORDER_NOTHING_AFTER) {
14133
- update_parameter_state(parser, &parser->current, &order);
14134
- parser_lex(parser);
14338
+ bool succeeded = update_parameter_state(parser, &parser->current, &order);
14339
+ parser_lex(parser);
14135
14340
 
14136
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14341
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14342
+ pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14137
14343
 
14138
- pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14139
- if (params->keyword_rest != NULL) {
14140
- // If we already have a keyword rest parameter, then we replace it with the
14141
- // forwarding parameter and move the keyword rest parameter to the posts list.
14142
- pm_node_t *keyword_rest = params->keyword_rest;
14143
- pm_parameters_node_posts_append(params, keyword_rest);
14144
- pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14145
- params->keyword_rest = NULL;
14146
- }
14147
- pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
14148
- } else {
14149
- update_parameter_state(parser, &parser->current, &order);
14150
- parser_lex(parser);
14344
+ if (params->keyword_rest != NULL) {
14345
+ // If we already have a keyword rest parameter, then we replace it with the
14346
+ // forwarding parameter and move the keyword rest parameter to the posts list.
14347
+ pm_node_t *keyword_rest = params->keyword_rest;
14348
+ pm_parameters_node_posts_append(params, keyword_rest);
14349
+ if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14350
+ params->keyword_rest = NULL;
14151
14351
  }
14152
14352
 
14353
+ pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14153
14354
  break;
14154
14355
  }
14155
14356
  case PM_TOKEN_CLASS_VARIABLE:
@@ -14193,7 +14394,7 @@ parse_parameters(
14193
14394
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14194
14395
 
14195
14396
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14196
- uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
14397
+ uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14197
14398
 
14198
14399
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
14199
14400
  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
@@ -14206,7 +14407,7 @@ parse_parameters(
14206
14407
  // If the value of the parameter increased the number of
14207
14408
  // reads of that parameter, then we need to warn that we
14208
14409
  // have a circular definition.
14209
- if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
14410
+ if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14210
14411
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14211
14412
  }
14212
14413
 
@@ -14244,6 +14445,12 @@ parse_parameters(
14244
14445
  pm_token_t local = name;
14245
14446
  local.end -= 1;
14246
14447
 
14448
+ if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14449
+ pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14450
+ } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14451
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14452
+ }
14453
+
14247
14454
  bool repeated = pm_parser_parameter_name_check(parser, &local);
14248
14455
  pm_parser_local_add_token(parser, &local, 1);
14249
14456
 
@@ -14279,10 +14486,10 @@ parse_parameters(
14279
14486
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14280
14487
 
14281
14488
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14282
- uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
14489
+ uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14283
14490
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
14284
14491
 
14285
- if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
14492
+ if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14286
14493
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14287
14494
  }
14288
14495
 
@@ -14454,7 +14661,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14454
14661
  pm_rescue_node_operator_set(rescue, &parser->previous);
14455
14662
 
14456
14663
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14457
- reference = parse_target(parser, reference, false);
14664
+ reference = parse_target(parser, reference, false, false);
14458
14665
 
14459
14666
  pm_rescue_node_reference_set(rescue, reference);
14460
14667
  break;
@@ -14484,7 +14691,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14484
14691
  pm_rescue_node_operator_set(rescue, &parser->previous);
14485
14692
 
14486
14693
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14487
- reference = parse_target(parser, reference, false);
14694
+ reference = parse_target(parser, reference, false, false);
14488
14695
 
14489
14696
  pm_rescue_node_reference_set(rescue, reference);
14490
14697
  break;
@@ -14689,6 +14896,28 @@ parse_block_parameters(
14689
14896
  return block_parameters;
14690
14897
  }
14691
14898
 
14899
+ /**
14900
+ * Return true if any of the visible scopes to the current context are using
14901
+ * numbered parameters.
14902
+ */
14903
+ static bool
14904
+ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
14905
+ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14906
+ if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
14907
+ }
14908
+
14909
+ return false;
14910
+ }
14911
+
14912
+ /**
14913
+ * These are the names of the various numbered parameters. We have them here so
14914
+ * that when we insert them into the constant pool we can use a constant string
14915
+ * and not have to allocate.
14916
+ */
14917
+ static const char * const pm_numbered_parameter_names[] = {
14918
+ "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
14919
+ };
14920
+
14692
14921
  /**
14693
14922
  * Return the node that should be used in the parameters field of a block-like
14694
14923
  * (block or lambda) node, depending on the kind of parameters that were
@@ -14696,31 +14925,79 @@ parse_block_parameters(
14696
14925
  */
14697
14926
  static pm_node_t *
14698
14927
  parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
14699
- uint8_t masked = parser->current_scope->parameters & PM_SCOPE_PARAMETERS_TYPE_MASK;
14928
+ pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
14929
+
14930
+ // If we have ordinary parameters, then we will return them as the set of
14931
+ // parameters.
14932
+ if (parameters != NULL) {
14933
+ // If we also have implicit parameters, then this is an error.
14934
+ if (implicit_parameters->size > 0) {
14935
+ pm_node_t *node = implicit_parameters->nodes[0];
14936
+
14937
+ if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14938
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14939
+ } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14940
+ pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14941
+ } else {
14942
+ assert(false && "unreachable");
14943
+ }
14944
+ }
14700
14945
 
14701
- if (masked == PM_SCOPE_PARAMETERS_NONE) {
14702
- assert(parameters == NULL);
14703
- return NULL;
14704
- } else if (masked == PM_SCOPE_PARAMETERS_ORDINARY) {
14705
- assert(parameters != NULL);
14706
14946
  return parameters;
14707
- } else if (masked == PM_SCOPE_PARAMETERS_NUMBERED) {
14708
- assert(parameters == NULL);
14947
+ }
14709
14948
 
14710
- int8_t maximum = parser->current_scope->numbered_parameters;
14711
- if (maximum > 0) {
14712
- const pm_location_t location = { .start = opening->start, .end = closing->end };
14713
- return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, (uint8_t) maximum);
14949
+ // If we don't have any implicit parameters, then the set of parameters is
14950
+ // NULL.
14951
+ if (implicit_parameters->size == 0) {
14952
+ return NULL;
14953
+ }
14954
+
14955
+ // If we don't have ordinary parameters, then we now must validate our set
14956
+ // of implicit parameters. We can only have numbered parameters or it, but
14957
+ // they cannot be mixed.
14958
+ uint8_t numbered_parameter = 0;
14959
+ bool it_parameter = false;
14960
+
14961
+ for (size_t index = 0; index < implicit_parameters->size; index++) {
14962
+ pm_node_t *node = implicit_parameters->nodes[index];
14963
+
14964
+ if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14965
+ if (it_parameter) {
14966
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
14967
+ } else if (outer_scope_using_numbered_parameters_p(parser)) {
14968
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
14969
+ } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
14970
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
14971
+ } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
14972
+ numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
14973
+ } else {
14974
+ assert(false && "unreachable");
14975
+ }
14976
+ } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14977
+ if (numbered_parameter > 0) {
14978
+ pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
14979
+ } else {
14980
+ it_parameter = true;
14981
+ }
14714
14982
  }
14983
+ }
14715
14984
 
14716
- return NULL;
14717
- } else if (masked == PM_SCOPE_PARAMETERS_IT) {
14718
- assert(parameters == NULL);
14985
+ if (numbered_parameter > 0) {
14986
+ // Go through the parent scopes and mark them as being disallowed from
14987
+ // using numbered parameters because this inner scope is using them.
14988
+ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14989
+ scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
14990
+ }
14991
+
14992
+ const pm_location_t location = { .start = opening->start, .end = closing->end };
14993
+ return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
14994
+ }
14995
+
14996
+ if (it_parameter) {
14719
14997
  return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
14720
- } else {
14721
- assert(false && "unreachable");
14722
- return NULL;
14723
14998
  }
14999
+
15000
+ return NULL;
14724
15001
  }
14725
15002
 
14726
15003
  /**
@@ -14737,9 +15014,6 @@ parse_block(pm_parser_t *parser) {
14737
15014
  pm_block_parameters_node_t *block_parameters = NULL;
14738
15015
 
14739
15016
  if (accept1(parser, PM_TOKEN_PIPE)) {
14740
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
14741
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
14742
-
14743
15017
  pm_token_t block_parameters_opening = parser->previous;
14744
15018
  if (match1(parser, PM_TOKEN_PIPE)) {
14745
15019
  block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
@@ -14808,7 +15082,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14808
15082
  arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14809
15083
  } else {
14810
15084
  pm_accepts_block_stack_push(parser, true);
14811
- parse_arguments(parser, arguments, true, PM_TOKEN_PARENTHESIS_RIGHT);
15085
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT);
14812
15086
 
14813
15087
  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14814
15088
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
@@ -14826,7 +15100,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14826
15100
  // If we get here, then the subsequent token cannot be used as an infix
14827
15101
  // operator. In this case we assume the subsequent token is part of an
14828
15102
  // argument to this method call.
14829
- parse_arguments(parser, arguments, true, PM_TOKEN_EOF);
15103
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF);
14830
15104
 
14831
15105
  // If we have done with the arguments and still not consumed the comma,
14832
15106
  // then we have a trailing comma where we need to check whether it is
@@ -14857,11 +15131,8 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14857
15131
  if (arguments->block == NULL && !arguments->has_forwarding) {
14858
15132
  arguments->block = (pm_node_t *) block;
14859
15133
  } else {
14860
- if (arguments->has_forwarding) {
14861
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
14862
- } else {
14863
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
14864
- }
15134
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
15135
+
14865
15136
  if (arguments->block != NULL) {
14866
15137
  if (arguments->arguments == NULL) {
14867
15138
  arguments->arguments = pm_arguments_node_create(parser);
@@ -15240,7 +15511,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
15240
15511
  #define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15241
15512
  case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15242
15513
  case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15243
- case PM_NUMBERED_REFERENCE_READ_NODE
15514
+ case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15244
15515
 
15245
15516
  // Assert here that the flags are the same so that we can safely switch the type
15246
15517
  // of the node without having to move the flags.
@@ -15298,6 +15569,10 @@ parse_string_part(pm_parser_t *parser) {
15298
15569
  // "aaa #{bbb} #@ccc ddd"
15299
15570
  // ^^^^^^
15300
15571
  case PM_TOKEN_EMBEXPR_BEGIN: {
15572
+ // Ruby disallows seeing encoding around interpolation in strings,
15573
+ // even though it is known at parse time.
15574
+ parser->explicit_encoding = NULL;
15575
+
15301
15576
  pm_lex_state_t state = parser->lex_state;
15302
15577
  int brace_nesting = parser->brace_nesting;
15303
15578
 
@@ -15320,6 +15595,13 @@ parse_string_part(pm_parser_t *parser) {
15320
15595
  expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15321
15596
  pm_token_t closing = parser->previous;
15322
15597
 
15598
+ // If this set of embedded statements only contains a single
15599
+ // statement, then Ruby does not consider it as a possible statement
15600
+ // that could emit a line event.
15601
+ if (statements != NULL && statements->body.size == 1) {
15602
+ pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
15603
+ }
15604
+
15323
15605
  return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
15324
15606
  }
15325
15607
 
@@ -15330,6 +15612,10 @@ parse_string_part(pm_parser_t *parser) {
15330
15612
  // "aaa #{bbb} #@ccc ddd"
15331
15613
  // ^^^^^
15332
15614
  case PM_TOKEN_EMBVAR: {
15615
+ // Ruby disallows seeing encoding around interpolation in strings,
15616
+ // even though it is known at parse time.
15617
+ parser->explicit_encoding = NULL;
15618
+
15333
15619
  lex_state_set(parser, PM_LEX_STATE_BEG);
15334
15620
  parser_lex(parser);
15335
15621
 
@@ -15644,75 +15930,44 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
15644
15930
  }
15645
15931
  }
15646
15932
 
15647
- /**
15648
- * Return true if any of the visible scopes to the current context are using
15649
- * numbered parameters.
15650
- */
15651
- static bool
15652
- outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15653
- for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15654
- if (scope->numbered_parameters > 0) return true;
15655
- }
15656
-
15657
- return false;
15658
- }
15659
-
15660
- /**
15661
- * These are the names of the various numbered parameters. We have them here so
15662
- * that when we insert them into the constant pool we can use a constant string
15663
- * and not have to allocate.
15664
- */
15665
- static const char * const pm_numbered_parameter_names[] = {
15666
- "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15667
- };
15668
-
15669
15933
  /**
15670
15934
  * Parse an identifier into either a local variable read. If the local variable
15671
15935
  * is not found, it returns NULL instead.
15672
15936
  */
15673
- static pm_local_variable_read_node_t *
15937
+ static pm_node_t *
15674
15938
  parse_variable(pm_parser_t *parser) {
15939
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
15675
15940
  int depth;
15676
- if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
15677
- return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
15941
+
15942
+ if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
15943
+ return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
15678
15944
  }
15679
15945
 
15680
15946
  pm_scope_t *current_scope = parser->current_scope;
15681
- if (!current_scope->closed && current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
15682
- // Now that we know we have a numbered parameter, we need to check
15683
- // if it's allowed in this context. If it is, then we will create a
15684
- // local variable read. If it's not, then we'll create a normal call
15685
- // node but add an error.
15686
- if (current_scope->parameters & PM_SCOPE_PARAMETERS_ORDINARY) {
15687
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15688
- } else if (current_scope->parameters & PM_SCOPE_PARAMETERS_IT) {
15689
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_IT);
15690
- } else if (outer_scope_using_numbered_parameters_p(parser)) {
15691
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
15692
- } else {
15693
- // Indicate that this scope is using numbered params so that child
15694
- // scopes cannot. We subtract the value for the character '0' to get
15695
- // the actual integer value of the number (only _1 through _9 are
15696
- // valid).
15697
- int8_t numbered_parameters = (int8_t) (parser->previous.start[1] - '0');
15698
- current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED;
15699
-
15700
- if (numbered_parameters > current_scope->numbered_parameters) {
15701
- current_scope->numbered_parameters = numbered_parameters;
15947
+ if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
15948
+ if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
15949
+ // When you use a numbered parameter, it implies the existence of
15950
+ // all of the locals that exist before it. For example, referencing
15951
+ // _2 means that _1 must exist. Therefore here we loop through all
15952
+ // of the possibilities and add them into the constant pool.
15953
+ uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
15954
+ for (uint8_t number = 1; number <= maximum; number++) {
15955
+ pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
15702
15956
  }
15703
15957
 
15704
- // When you use a numbered parameter, it implies the existence
15705
- // of all of the locals that exist before it. For example,
15706
- // referencing _2 means that _1 must exist. Therefore here we
15707
- // loop through all of the possibilities and add them into the
15708
- // constant pool.
15709
- for (int8_t numbered_param = 1; numbered_param <= numbered_parameters - 1; numbered_param++) {
15710
- pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_param - 1], 2);
15958
+ if (!match1(parser, PM_TOKEN_EQUAL)) {
15959
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
15711
15960
  }
15712
15961
 
15713
- // Finally we can create the local variable read node.
15714
- pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
15715
- return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
15962
+ pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
15963
+ pm_node_list_append(&current_scope->implicit_parameters, node);
15964
+
15965
+ return node;
15966
+ } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
15967
+ pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
15968
+ pm_node_list_append(&current_scope->implicit_parameters, node);
15969
+
15970
+ return node;
15716
15971
  }
15717
15972
  }
15718
15973
 
@@ -15727,8 +15982,8 @@ parse_variable_call(pm_parser_t *parser) {
15727
15982
  pm_node_flags_t flags = 0;
15728
15983
 
15729
15984
  if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
15730
- pm_local_variable_read_node_t *node = parse_variable(parser);
15731
- if (node != NULL) return (pm_node_t *) node;
15985
+ pm_node_t *node = parse_variable(parser);
15986
+ if (node != NULL) return node;
15732
15987
  flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
15733
15988
  }
15734
15989
 
@@ -15846,127 +16101,355 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
15846
16101
  nodes->size = write_index;
15847
16102
  }
15848
16103
 
15849
- static pm_node_t *
15850
- parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id);
15851
-
15852
16104
  /**
15853
- * Add the newly created local to the list of captures for this pattern matching
15854
- * expression. If it is duplicated from a previous local, then we'll need to add
15855
- * an error to the parser.
16105
+ * Return a string content token at a particular location that is empty.
15856
16106
  */
15857
- static void
15858
- parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
15859
- // Skip this capture if it starts with an underscore.
15860
- if (*location->start == '_') return;
15861
-
15862
- if (pm_constant_id_list_includes(captures, capture)) {
15863
- pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
15864
- } else {
15865
- pm_constant_id_list_append(captures, capture);
15866
- }
16107
+ static pm_token_t
16108
+ parse_strings_empty_content(const uint8_t *location) {
16109
+ return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
15867
16110
  }
15868
16111
 
15869
16112
  /**
15870
- * Accept any number of constants joined by :: delimiters.
16113
+ * Parse a set of strings that could be concatenated together.
15871
16114
  */
15872
- static pm_node_t *
15873
- parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node) {
15874
- // Now, if there are any :: operators that follow, parse them as constant
15875
- // path nodes.
15876
- while (accept1(parser, PM_TOKEN_COLON_COLON)) {
15877
- pm_token_t delimiter = parser->previous;
15878
- expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
15879
- node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
15880
- }
15881
-
15882
- // If there is a [ or ( that follows, then this is part of a larger pattern
15883
- // expression. We'll parse the inner pattern here, then modify the returned
15884
- // inner pattern with our constant path attached.
15885
- if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
15886
- return node;
15887
- }
16115
+ static inline pm_node_t *
16116
+ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16117
+ assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
15888
16118
 
15889
- pm_token_t opening;
15890
- pm_token_t closing;
15891
- pm_node_t *inner = NULL;
16119
+ bool concating = false;
16120
+ bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
15892
16121
 
15893
- if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
15894
- opening = parser->previous;
15895
- accept1(parser, PM_TOKEN_NEWLINE);
16122
+ while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16123
+ pm_node_t *node = NULL;
15896
16124
 
15897
- if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
15898
- inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
15899
- accept1(parser, PM_TOKEN_NEWLINE);
15900
- expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
15901
- }
16125
+ // Here we have found a string literal. We'll parse it and add it to
16126
+ // the list of strings.
16127
+ const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16128
+ assert(lex_mode->mode == PM_LEX_STRING);
16129
+ bool lex_interpolation = lex_mode->as.string.interpolation;
15902
16130
 
15903
- closing = parser->previous;
15904
- } else {
16131
+ pm_token_t opening = parser->current;
15905
16132
  parser_lex(parser);
15906
- opening = parser->previous;
15907
- accept1(parser, PM_TOKEN_NEWLINE);
15908
-
15909
- if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15910
- inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
15911
- accept1(parser, PM_TOKEN_NEWLINE);
15912
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
15913
- }
15914
-
15915
- closing = parser->previous;
15916
- }
15917
-
15918
- if (!inner) {
15919
- // If there was no inner pattern, then we have something like Foo() or
15920
- // Foo[]. In that case we'll create an array pattern with no requireds.
15921
- return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
15922
- }
15923
16133
 
15924
- // Now that we have the inner pattern, check to see if it's an array, find,
15925
- // or hash pattern. If it is, then we'll attach our constant path to it if
15926
- // it doesn't already have a constant. If it's not one of those node types
15927
- // or it does have a constant, then we'll create an array pattern.
15928
- switch (PM_NODE_TYPE(inner)) {
15929
- case PM_ARRAY_PATTERN_NODE: {
15930
- pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16134
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16135
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16136
+ // If we get here, then we have an end immediately after a
16137
+ // start. In that case we'll create an empty content token and
16138
+ // return an uninterpolated string.
16139
+ pm_token_t content = parse_strings_empty_content(parser->previous.start);
16140
+ pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
15931
16141
 
15932
- if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
15933
- pattern_node->base.location.start = node->location.start;
15934
- pattern_node->base.location.end = closing.end;
16142
+ pm_string_shared_init(&string->unescaped, content.start, content.end);
16143
+ node = (pm_node_t *) string;
16144
+ } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16145
+ // If we get here, then we have an end of a label immediately
16146
+ // after a start. In that case we'll create an empty symbol
16147
+ // node.
16148
+ pm_token_t content = parse_strings_empty_content(parser->previous.start);
16149
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
15935
16150
 
15936
- pattern_node->constant = node;
15937
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
15938
- pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16151
+ pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16152
+ node = (pm_node_t *) symbol;
16153
+ } else if (!lex_interpolation) {
16154
+ // If we don't accept interpolation then we expect the string to
16155
+ // start with a single string content node.
16156
+ pm_string_t unescaped;
16157
+ pm_token_t content;
15939
16158
 
15940
- return (pm_node_t *) pattern_node;
16159
+ if (match1(parser, PM_TOKEN_EOF)) {
16160
+ unescaped = PM_STRING_EMPTY;
16161
+ content = not_provided(parser);
16162
+ } else {
16163
+ unescaped = parser->current_string;
16164
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16165
+ content = parser->previous;
15941
16166
  }
15942
16167
 
15943
- break;
15944
- }
15945
- case PM_FIND_PATTERN_NODE: {
15946
- pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
15947
-
15948
- if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
15949
- pattern_node->base.location.start = node->location.start;
15950
- pattern_node->base.location.end = closing.end;
15951
-
15952
- pattern_node->constant = node;
15953
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
15954
- pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16168
+ // It is unfortunately possible to have multiple string content
16169
+ // nodes in a row in the case that there's heredoc content in
16170
+ // the middle of the string, like this cursed example:
16171
+ //
16172
+ // <<-END+'b
16173
+ // a
16174
+ // END
16175
+ // c'+'d'
16176
+ //
16177
+ // In that case we need to switch to an interpolated string to
16178
+ // be able to contain all of the parts.
16179
+ if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16180
+ pm_node_list_t parts = { 0 };
15955
16181
 
15956
- return (pm_node_t *) pattern_node;
15957
- }
16182
+ pm_token_t delimiters = not_provided(parser);
16183
+ pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16184
+ pm_node_list_append(&parts, part);
15958
16185
 
15959
- break;
15960
- }
15961
- case PM_HASH_PATTERN_NODE: {
15962
- pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16186
+ do {
16187
+ part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16188
+ pm_node_list_append(&parts, part);
16189
+ parser_lex(parser);
16190
+ } while (match1(parser, PM_TOKEN_STRING_CONTENT));
15963
16191
 
15964
- if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
15965
- pattern_node->base.location.start = node->location.start;
15966
- pattern_node->base.location.end = closing.end;
16192
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16193
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
15967
16194
 
15968
- pattern_node->constant = node;
15969
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16195
+ pm_node_list_free(&parts);
16196
+ } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16197
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16198
+ } else if (match1(parser, PM_TOKEN_EOF)) {
16199
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16200
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16201
+ } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16202
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16203
+ } else {
16204
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16205
+ parser->previous.start = parser->previous.end;
16206
+ parser->previous.type = PM_TOKEN_MISSING;
16207
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16208
+ }
16209
+ } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16210
+ // In this case we've hit string content so we know the string
16211
+ // at least has something in it. We'll need to check if the
16212
+ // following token is the end (in which case we can return a
16213
+ // plain string) or if it's not then it has interpolation.
16214
+ pm_token_t content = parser->current;
16215
+ pm_string_t unescaped = parser->current_string;
16216
+ parser_lex(parser);
16217
+
16218
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16219
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16220
+ pm_node_flag_set(node, parse_unescaped_encoding(parser));
16221
+
16222
+ // Kind of odd behavior, but basically if we have an
16223
+ // unterminated string and it ends in a newline, we back up one
16224
+ // character so that the error message is on the last line of
16225
+ // content in the string.
16226
+ if (!accept1(parser, PM_TOKEN_STRING_END)) {
16227
+ const uint8_t *location = parser->previous.end;
16228
+ if (location > parser->start && location[-1] == '\n') location--;
16229
+ pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16230
+
16231
+ parser->previous.start = parser->previous.end;
16232
+ parser->previous.type = PM_TOKEN_MISSING;
16233
+ }
16234
+ } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16235
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16236
+ } else {
16237
+ // If we get here, then we have interpolation so we'll need
16238
+ // to create a string or symbol node with interpolation.
16239
+ pm_node_list_t parts = { 0 };
16240
+ pm_token_t string_opening = not_provided(parser);
16241
+ pm_token_t string_closing = not_provided(parser);
16242
+
16243
+ pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16244
+ pm_node_flag_set(part, parse_unescaped_encoding(parser));
16245
+ pm_node_list_append(&parts, part);
16246
+
16247
+ while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16248
+ if ((part = parse_string_part(parser)) != NULL) {
16249
+ pm_node_list_append(&parts, part);
16250
+ }
16251
+ }
16252
+
16253
+ if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16254
+ node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16255
+ } else if (match1(parser, PM_TOKEN_EOF)) {
16256
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16257
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16258
+ } else {
16259
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16260
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16261
+ }
16262
+
16263
+ pm_node_list_free(&parts);
16264
+ }
16265
+ } else {
16266
+ // If we get here, then the first part of the string is not plain
16267
+ // string content, in which case we need to parse the string as an
16268
+ // interpolated string.
16269
+ pm_node_list_t parts = { 0 };
16270
+ pm_node_t *part;
16271
+
16272
+ while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16273
+ if ((part = parse_string_part(parser)) != NULL) {
16274
+ pm_node_list_append(&parts, part);
16275
+ }
16276
+ }
16277
+
16278
+ if (accept1(parser, PM_TOKEN_LABEL_END)) {
16279
+ node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16280
+ } else if (match1(parser, PM_TOKEN_EOF)) {
16281
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16282
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16283
+ } else {
16284
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16285
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16286
+ }
16287
+
16288
+ pm_node_list_free(&parts);
16289
+ }
16290
+
16291
+ if (current == NULL) {
16292
+ // If the node we just parsed is a symbol node, then we can't
16293
+ // concatenate it with anything else, so we can now return that
16294
+ // node.
16295
+ if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16296
+ return node;
16297
+ }
16298
+
16299
+ // If we don't already have a node, then it's fine and we can just
16300
+ // set the result to be the node we just parsed.
16301
+ current = node;
16302
+ } else {
16303
+ // Otherwise we need to check the type of the node we just parsed.
16304
+ // If it cannot be concatenated with the previous node, then we'll
16305
+ // need to add a syntax error.
16306
+ if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16307
+ pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16308
+ }
16309
+
16310
+ // If we haven't already created our container for concatenation,
16311
+ // we'll do that now.
16312
+ if (!concating) {
16313
+ concating = true;
16314
+ pm_token_t bounds = not_provided(parser);
16315
+
16316
+ pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16317
+ pm_interpolated_string_node_append(container, current);
16318
+ current = (pm_node_t *) container;
16319
+ }
16320
+
16321
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16322
+ }
16323
+ }
16324
+
16325
+ return current;
16326
+ }
16327
+
16328
+ #define PM_PARSE_PATTERN_SINGLE 0
16329
+ #define PM_PARSE_PATTERN_TOP 1
16330
+ #define PM_PARSE_PATTERN_MULTI 2
16331
+
16332
+ static pm_node_t *
16333
+ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id);
16334
+
16335
+ /**
16336
+ * Add the newly created local to the list of captures for this pattern matching
16337
+ * expression. If it is duplicated from a previous local, then we'll need to add
16338
+ * an error to the parser.
16339
+ */
16340
+ static void
16341
+ parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16342
+ // Skip this capture if it starts with an underscore.
16343
+ if (*location->start == '_') return;
16344
+
16345
+ if (pm_constant_id_list_includes(captures, capture)) {
16346
+ pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16347
+ } else {
16348
+ pm_constant_id_list_append(captures, capture);
16349
+ }
16350
+ }
16351
+
16352
+ /**
16353
+ * Accept any number of constants joined by :: delimiters.
16354
+ */
16355
+ static pm_node_t *
16356
+ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node) {
16357
+ // Now, if there are any :: operators that follow, parse them as constant
16358
+ // path nodes.
16359
+ while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16360
+ pm_token_t delimiter = parser->previous;
16361
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16362
+ node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
16363
+ }
16364
+
16365
+ // If there is a [ or ( that follows, then this is part of a larger pattern
16366
+ // expression. We'll parse the inner pattern here, then modify the returned
16367
+ // inner pattern with our constant path attached.
16368
+ if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16369
+ return node;
16370
+ }
16371
+
16372
+ pm_token_t opening;
16373
+ pm_token_t closing;
16374
+ pm_node_t *inner = NULL;
16375
+
16376
+ if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16377
+ opening = parser->previous;
16378
+ accept1(parser, PM_TOKEN_NEWLINE);
16379
+
16380
+ if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16381
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
16382
+ accept1(parser, PM_TOKEN_NEWLINE);
16383
+ expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16384
+ }
16385
+
16386
+ closing = parser->previous;
16387
+ } else {
16388
+ parser_lex(parser);
16389
+ opening = parser->previous;
16390
+ accept1(parser, PM_TOKEN_NEWLINE);
16391
+
16392
+ if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16393
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16394
+ accept1(parser, PM_TOKEN_NEWLINE);
16395
+ expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16396
+ }
16397
+
16398
+ closing = parser->previous;
16399
+ }
16400
+
16401
+ if (!inner) {
16402
+ // If there was no inner pattern, then we have something like Foo() or
16403
+ // Foo[]. In that case we'll create an array pattern with no requireds.
16404
+ return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16405
+ }
16406
+
16407
+ // Now that we have the inner pattern, check to see if it's an array, find,
16408
+ // or hash pattern. If it is, then we'll attach our constant path to it if
16409
+ // it doesn't already have a constant. If it's not one of those node types
16410
+ // or it does have a constant, then we'll create an array pattern.
16411
+ switch (PM_NODE_TYPE(inner)) {
16412
+ case PM_ARRAY_PATTERN_NODE: {
16413
+ pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16414
+
16415
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16416
+ pattern_node->base.location.start = node->location.start;
16417
+ pattern_node->base.location.end = closing.end;
16418
+
16419
+ pattern_node->constant = node;
16420
+ pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16421
+ pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16422
+
16423
+ return (pm_node_t *) pattern_node;
16424
+ }
16425
+
16426
+ break;
16427
+ }
16428
+ case PM_FIND_PATTERN_NODE: {
16429
+ pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16430
+
16431
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16432
+ pattern_node->base.location.start = node->location.start;
16433
+ pattern_node->base.location.end = closing.end;
16434
+
16435
+ pattern_node->constant = node;
16436
+ pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16437
+ pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16438
+
16439
+ return (pm_node_t *) pattern_node;
16440
+ }
16441
+
16442
+ break;
16443
+ }
16444
+ case PM_HASH_PATTERN_NODE: {
16445
+ pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16446
+
16447
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16448
+ pattern_node->base.location.start = node->location.start;
16449
+ pattern_node->base.location.end = closing.end;
16450
+
16451
+ pattern_node->constant = node;
16452
+ pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
15970
16453
  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
15971
16454
 
15972
16455
  return (pm_node_t *) pattern_node;
@@ -16055,6 +16538,33 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
16055
16538
  return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
16056
16539
  }
16057
16540
 
16541
+ /**
16542
+ * Check that the slice of the source given by the bounds parameters constitutes
16543
+ * a valid local variable name.
16544
+ */
16545
+ static bool
16546
+ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16547
+ ptrdiff_t length = end - start;
16548
+ if (length == 0) return false;
16549
+
16550
+ // First ensure that it starts with a valid identifier starting character.
16551
+ size_t width = char_is_identifier_start(parser, start);
16552
+ if (width == 0) return false;
16553
+
16554
+ // Next, ensure that it's not an uppercase character.
16555
+ if (parser->encoding_changed) {
16556
+ if (parser->encoding->isupper_char(start, length)) return false;
16557
+ } else {
16558
+ if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16559
+ }
16560
+
16561
+ // Next, iterate through all of the bytes of the string to ensure that they
16562
+ // are all valid identifier characters.
16563
+ const uint8_t *cursor = start + width;
16564
+ while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
16565
+ return cursor == end;
16566
+ }
16567
+
16058
16568
  /**
16059
16569
  * Create an implicit node for the value of a hash pattern that has omitted the
16060
16570
  * value. This will use an implicit local variable target.
@@ -16062,14 +16572,18 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
16062
16572
  static pm_node_t *
16063
16573
  parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
16064
16574
  const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
16065
- pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
16066
16575
 
16576
+ pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
16067
16577
  int depth = -1;
16068
- if (value_loc->end[-1] == '!' || value_loc->end[-1] == '?') {
16069
- pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16070
- PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16071
- } else {
16578
+
16579
+ if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
16072
16580
  depth = pm_parser_local_depth_constant_id(parser, constant_id);
16581
+ } else {
16582
+ pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16583
+
16584
+ if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
16585
+ PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16586
+ }
16073
16587
  }
16074
16588
 
16075
16589
  if (depth == -1) {
@@ -16093,7 +16607,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
16093
16607
  */
16094
16608
  static void
16095
16609
  parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
16096
- if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
16610
+ if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
16097
16611
  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16098
16612
  }
16099
16613
  }
@@ -16124,7 +16638,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16124
16638
  } else {
16125
16639
  // Here we have a value for the first assoc in the list, so
16126
16640
  // we will parse it now.
16127
- value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16641
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16128
16642
  }
16129
16643
 
16130
16644
  pm_token_t operator = not_provided(parser);
@@ -16139,7 +16653,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16139
16653
  // If we get anything else, then this is an error. For this we'll
16140
16654
  // create a missing node for the value and create an assoc node for
16141
16655
  // the first node in the list.
16142
- pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
16656
+ pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16657
+ pm_parser_err_node(parser, first_node, diag_id);
16143
16658
 
16144
16659
  pm_token_t operator = not_provided(parser);
16145
16660
  pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
@@ -16167,8 +16682,20 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16167
16682
  pm_node_list_append(&assocs, assoc);
16168
16683
  }
16169
16684
  } else {
16170
- expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16171
- pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
16685
+ pm_node_t *key;
16686
+
16687
+ if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16688
+ key = parse_strings(parser, NULL);
16689
+
16690
+ if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16691
+ pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16692
+ } else if (!pm_symbol_node_label_p(key)) {
16693
+ pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16694
+ }
16695
+ } else {
16696
+ expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16697
+ key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
16698
+ }
16172
16699
 
16173
16700
  parse_pattern_hash_key(parser, &keys, key);
16174
16701
  pm_node_t *value = NULL;
@@ -16176,7 +16703,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16176
16703
  if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16177
16704
  value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
16178
16705
  } else {
16179
- value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16706
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16180
16707
  }
16181
16708
 
16182
16709
  pm_token_t operator = not_provided(parser);
@@ -16233,7 +16760,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16233
16760
 
16234
16761
  // Otherwise, we'll parse the inner pattern, then deal with it depending
16235
16762
  // on the type it returns.
16236
- pm_node_t *inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
16763
+ pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
16237
16764
 
16238
16765
  accept1(parser, PM_TOKEN_NEWLINE);
16239
16766
  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
@@ -16300,11 +16827,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16300
16827
  first_node = parse_pattern_keyword_rest(parser, captures);
16301
16828
  break;
16302
16829
  case PM_TOKEN_STRING_BEGIN:
16303
- first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
16830
+ first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY_LABEL);
16304
16831
  break;
16305
16832
  default: {
16833
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
16306
16834
  parser_lex(parser);
16307
- pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
16308
16835
 
16309
16836
  first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
16310
16837
  break;
@@ -16380,19 +16907,8 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16380
16907
  pm_node_t *variable = (pm_node_t *) parse_variable(parser);
16381
16908
 
16382
16909
  if (variable == NULL) {
16383
- if (
16384
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
16385
- !parser->current_scope->closed &&
16386
- (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
16387
- pm_token_is_it(parser->previous.start, parser->previous.end)
16388
- ) {
16389
- pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
16390
- if (read == NULL) read = pm_local_variable_read_node_create(parser, &parser->previous, 0);
16391
- variable = (pm_node_t *) read;
16392
- } else {
16393
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16394
- variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
16395
- }
16910
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16911
+ variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
16396
16912
  }
16397
16913
 
16398
16914
  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
@@ -16506,7 +17022,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
16506
17022
  pm_token_t opening = parser->current;
16507
17023
  parser_lex(parser);
16508
17024
 
16509
- pm_node_t *body = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
17025
+ pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16510
17026
  accept1(parser, PM_TOKEN_NEWLINE);
16511
17027
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16512
17028
  pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
@@ -16565,7 +17081,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
16565
17081
  * Parse a pattern matching expression.
16566
17082
  */
16567
17083
  static pm_node_t *
16568
- parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id) {
17084
+ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id) {
16569
17085
  pm_node_t *node = NULL;
16570
17086
 
16571
17087
  bool leading_rest = false;
@@ -16575,14 +17091,26 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
16575
17091
  case PM_TOKEN_LABEL: {
16576
17092
  parser_lex(parser);
16577
17093
  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
16578
- return (pm_node_t *) parse_pattern_hash(parser, captures, key);
17094
+ node = (pm_node_t *) parse_pattern_hash(parser, captures, key);
17095
+
17096
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
17097
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17098
+ }
17099
+
17100
+ return node;
16579
17101
  }
16580
17102
  case PM_TOKEN_USTAR_STAR: {
16581
17103
  node = parse_pattern_keyword_rest(parser, captures);
16582
- return (pm_node_t *) parse_pattern_hash(parser, captures, node);
17104
+ node = (pm_node_t *) parse_pattern_hash(parser, captures, node);
17105
+
17106
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
17107
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17108
+ }
17109
+
17110
+ return node;
16583
17111
  }
16584
17112
  case PM_TOKEN_USTAR: {
16585
- if (top_pattern) {
17113
+ if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
16586
17114
  parser_lex(parser);
16587
17115
  node = (pm_node_t *) parse_pattern_rest(parser, captures);
16588
17116
  leading_rest = true;
@@ -16601,7 +17129,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
16601
17129
  return (pm_node_t *) parse_pattern_hash(parser, captures, node);
16602
17130
  }
16603
17131
 
16604
- if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
17132
+ if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
16605
17133
  // If we have a comma, then we are now parsing either an array pattern or a
16606
17134
  // find pattern. We need to parse all of the patterns, put them into a big
16607
17135
  // list, and then determine which type of node we have.
@@ -16642,262 +17170,53 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
16642
17170
  if (PM_NODE_TYPE_P(nodes.nodes[0], PM_SPLAT_NODE) && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
16643
17171
  node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
16644
17172
  } else {
16645
- node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
16646
- }
16647
-
16648
- xfree(nodes.nodes);
16649
- } else if (leading_rest) {
16650
- // Otherwise, if we parsed a single splat pattern, then we know we have an
16651
- // array pattern, so we can go ahead and create that node.
16652
- node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
16653
- }
16654
-
16655
- return node;
16656
- }
16657
-
16658
- /**
16659
- * Incorporate a negative sign into a numeric node by subtracting 1 character
16660
- * from its start bounds. If it's a compound node, then we will recursively
16661
- * apply this function to its value.
16662
- */
16663
- static inline void
16664
- parse_negative_numeric(pm_node_t *node) {
16665
- switch (PM_NODE_TYPE(node)) {
16666
- case PM_INTEGER_NODE: {
16667
- pm_integer_node_t *cast = (pm_integer_node_t *) node;
16668
- cast->base.location.start--;
16669
- cast->value.negative = true;
16670
- break;
16671
- }
16672
- case PM_FLOAT_NODE: {
16673
- pm_float_node_t *cast = (pm_float_node_t *) node;
16674
- cast->base.location.start--;
16675
- cast->value = -cast->value;
16676
- break;
16677
- }
16678
- case PM_RATIONAL_NODE:
16679
- node->location.start--;
16680
- parse_negative_numeric(((pm_rational_node_t *) node)->numeric);
16681
- break;
16682
- case PM_IMAGINARY_NODE:
16683
- node->location.start--;
16684
- parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
16685
- break;
16686
- default:
16687
- assert(false && "unreachable");
16688
- break;
16689
- }
16690
- }
16691
-
16692
- /**
16693
- * Return a string content token at a particular location that is empty.
16694
- */
16695
- static pm_token_t
16696
- parse_strings_empty_content(const uint8_t *location) {
16697
- return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16698
- }
16699
-
16700
- /**
16701
- * Parse a set of strings that could be concatenated together.
16702
- */
16703
- static inline pm_node_t *
16704
- parse_strings(pm_parser_t *parser, pm_node_t *current) {
16705
- assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16706
-
16707
- bool concating = false;
16708
- bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
16709
-
16710
- while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16711
- pm_node_t *node = NULL;
16712
-
16713
- // Here we have found a string literal. We'll parse it and add it to
16714
- // the list of strings.
16715
- const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16716
- assert(lex_mode->mode == PM_LEX_STRING);
16717
- bool lex_interpolation = lex_mode->as.string.interpolation;
16718
-
16719
- pm_token_t opening = parser->current;
16720
- parser_lex(parser);
16721
-
16722
- if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16723
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16724
- // If we get here, then we have an end immediately after a
16725
- // start. In that case we'll create an empty content token and
16726
- // return an uninterpolated string.
16727
- pm_token_t content = parse_strings_empty_content(parser->previous.start);
16728
- pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16729
-
16730
- pm_string_shared_init(&string->unescaped, content.start, content.end);
16731
- node = (pm_node_t *) string;
16732
- } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16733
- // If we get here, then we have an end of a label immediately
16734
- // after a start. In that case we'll create an empty symbol
16735
- // node.
16736
- pm_token_t content = parse_strings_empty_content(parser->previous.start);
16737
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16738
-
16739
- pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16740
- node = (pm_node_t *) symbol;
16741
- } else if (!lex_interpolation) {
16742
- // If we don't accept interpolation then we expect the string to
16743
- // start with a single string content node.
16744
- pm_string_t unescaped;
16745
- pm_token_t content;
16746
- if (match1(parser, PM_TOKEN_EOF)) {
16747
- unescaped = PM_STRING_EMPTY;
16748
- content = not_provided(parser);
16749
- } else {
16750
- unescaped = parser->current_string;
16751
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16752
- content = parser->previous;
16753
- }
16754
-
16755
- // It is unfortunately possible to have multiple string content
16756
- // nodes in a row in the case that there's heredoc content in
16757
- // the middle of the string, like this cursed example:
16758
- //
16759
- // <<-END+'b
16760
- // a
16761
- // END
16762
- // c'+'d'
16763
- //
16764
- // In that case we need to switch to an interpolated string to
16765
- // be able to contain all of the parts.
16766
- if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16767
- pm_node_list_t parts = { 0 };
16768
-
16769
- pm_token_t delimiters = not_provided(parser);
16770
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16771
- pm_node_list_append(&parts, part);
16772
-
16773
- do {
16774
- part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16775
- pm_node_list_append(&parts, part);
16776
- parser_lex(parser);
16777
- } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16778
-
16779
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16780
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16781
-
16782
- pm_node_list_free(&parts);
16783
- } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16784
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16785
- } else if (match1(parser, PM_TOKEN_EOF)) {
16786
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16787
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16788
- } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16789
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16790
- } else {
16791
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16792
- parser->previous.start = parser->previous.end;
16793
- parser->previous.type = PM_TOKEN_MISSING;
16794
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16795
- }
16796
- } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16797
- // In this case we've hit string content so we know the string
16798
- // at least has something in it. We'll need to check if the
16799
- // following token is the end (in which case we can return a
16800
- // plain string) or if it's not then it has interpolation.
16801
- pm_token_t content = parser->current;
16802
- pm_string_t unescaped = parser->current_string;
16803
- parser_lex(parser);
16804
-
16805
- if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16806
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16807
- pm_node_flag_set(node, parse_unescaped_encoding(parser));
16808
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16809
- } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16810
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16811
- } else {
16812
- // If we get here, then we have interpolation so we'll need
16813
- // to create a string or symbol node with interpolation.
16814
- pm_node_list_t parts = { 0 };
16815
- pm_token_t string_opening = not_provided(parser);
16816
- pm_token_t string_closing = not_provided(parser);
16817
-
16818
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16819
- pm_node_flag_set(part, parse_unescaped_encoding(parser));
16820
- pm_node_list_append(&parts, part);
16821
-
16822
- while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16823
- if ((part = parse_string_part(parser)) != NULL) {
16824
- pm_node_list_append(&parts, part);
16825
- }
16826
- }
16827
-
16828
- if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16829
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16830
- } else if (match1(parser, PM_TOKEN_EOF)) {
16831
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16832
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16833
- } else {
16834
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16835
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16836
- }
16837
-
16838
- pm_node_list_free(&parts);
16839
- }
16840
- } else {
16841
- // If we get here, then the first part of the string is not plain
16842
- // string content, in which case we need to parse the string as an
16843
- // interpolated string.
16844
- pm_node_list_t parts = { 0 };
16845
- pm_node_t *part;
16846
-
16847
- while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16848
- if ((part = parse_string_part(parser)) != NULL) {
16849
- pm_node_list_append(&parts, part);
16850
- }
16851
- }
16852
-
16853
- if (accept1(parser, PM_TOKEN_LABEL_END)) {
16854
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16855
- } else if (match1(parser, PM_TOKEN_EOF)) {
16856
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16857
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16858
- } else {
16859
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16860
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16861
- }
16862
-
16863
- pm_node_list_free(&parts);
16864
- }
16865
-
16866
- if (current == NULL) {
16867
- // If the node we just parsed is a symbol node, then we can't
16868
- // concatenate it with anything else, so we can now return that
16869
- // node.
16870
- if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16871
- return node;
16872
- }
16873
-
16874
- // If we don't already have a node, then it's fine and we can just
16875
- // set the result to be the node we just parsed.
16876
- current = node;
16877
- } else {
16878
- // Otherwise we need to check the type of the node we just parsed.
16879
- // If it cannot be concatenated with the previous node, then we'll
16880
- // need to add a syntax error.
16881
- if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16882
- pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16883
- }
17173
+ node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17174
+ }
16884
17175
 
16885
- // If we haven't already created our container for concatenation,
16886
- // we'll do that now.
16887
- if (!concating) {
16888
- concating = true;
16889
- pm_token_t bounds = not_provided(parser);
17176
+ xfree(nodes.nodes);
17177
+ } else if (leading_rest) {
17178
+ // Otherwise, if we parsed a single splat pattern, then we know we have an
17179
+ // array pattern, so we can go ahead and create that node.
17180
+ node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17181
+ }
16890
17182
 
16891
- pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16892
- pm_interpolated_string_node_append(container, current);
16893
- current = (pm_node_t *) container;
16894
- }
17183
+ return node;
17184
+ }
16895
17185
 
16896
- pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
17186
+ /**
17187
+ * Incorporate a negative sign into a numeric node by subtracting 1 character
17188
+ * from its start bounds. If it's a compound node, then we will recursively
17189
+ * apply this function to its value.
17190
+ */
17191
+ static inline void
17192
+ parse_negative_numeric(pm_node_t *node) {
17193
+ switch (PM_NODE_TYPE(node)) {
17194
+ case PM_INTEGER_NODE: {
17195
+ pm_integer_node_t *cast = (pm_integer_node_t *) node;
17196
+ cast->base.location.start--;
17197
+ cast->value.negative = true;
17198
+ break;
17199
+ }
17200
+ case PM_FLOAT_NODE: {
17201
+ pm_float_node_t *cast = (pm_float_node_t *) node;
17202
+ cast->base.location.start--;
17203
+ cast->value = -cast->value;
17204
+ break;
17205
+ }
17206
+ case PM_RATIONAL_NODE: {
17207
+ pm_rational_node_t *cast = (pm_rational_node_t *) node;
17208
+ cast->base.location.start--;
17209
+ cast->numerator.negative = true;
17210
+ break;
16897
17211
  }
17212
+ case PM_IMAGINARY_NODE:
17213
+ node->location.start--;
17214
+ parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17215
+ break;
17216
+ default:
17217
+ assert(false && "unreachable");
17218
+ break;
16898
17219
  }
16899
-
16900
- return current;
16901
17220
  }
16902
17221
 
16903
17222
  /**
@@ -16912,6 +17231,11 @@ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
16912
17231
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
16913
17232
  break;
16914
17233
  }
17234
+ case PM_ERR_HASH_VALUE:
17235
+ case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17236
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17237
+ break;
17238
+ }
16915
17239
  case PM_ERR_UNARY_RECEIVER: {
16916
17240
  const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
16917
17241
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
@@ -17090,6 +17414,63 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17090
17414
  }
17091
17415
  }
17092
17416
 
17417
+ /**
17418
+ * This struct is used to pass information between the regular expression parser
17419
+ * and the error callback.
17420
+ */
17421
+ typedef struct {
17422
+ /** The parser that we are parsing the regular expression for. */
17423
+ pm_parser_t *parser;
17424
+
17425
+ /** The start of the regular expression. */
17426
+ const uint8_t *start;
17427
+
17428
+ /** The end of the regular expression. */
17429
+ const uint8_t *end;
17430
+
17431
+ /**
17432
+ * Whether or not the source of the regular expression is shared. This
17433
+ * impacts the location of error messages, because if it is shared then we
17434
+ * can use the location directly and if it is not, then we use the bounds of
17435
+ * the regular expression itself.
17436
+ */
17437
+ bool shared;
17438
+ } parse_regular_expression_error_data_t;
17439
+
17440
+ /**
17441
+ * This callback is called when the regular expression parser encounters a
17442
+ * syntax error.
17443
+ */
17444
+ static void
17445
+ parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17446
+ parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
17447
+ pm_location_t location;
17448
+
17449
+ if (callback_data->shared) {
17450
+ location = (pm_location_t) { .start = start, .end = end };
17451
+ } else {
17452
+ location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
17453
+ }
17454
+
17455
+ PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
17456
+ }
17457
+
17458
+ /**
17459
+ * Parse the errors for the regular expression and add them to the parser.
17460
+ */
17461
+ static void
17462
+ parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
17463
+ const pm_string_t *unescaped = &node->unescaped;
17464
+ parse_regular_expression_error_data_t error_data = {
17465
+ .parser = parser,
17466
+ .start = node->base.location.start,
17467
+ .end = node->base.location.end,
17468
+ .shared = unescaped->type == PM_STRING_SHARED
17469
+ };
17470
+
17471
+ pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), NULL, NULL, parse_regular_expression_error, &error_data);
17472
+ }
17473
+
17093
17474
  /**
17094
17475
  * Parse an expression that begins with the previous node that we just lexed.
17095
17476
  */
@@ -17110,8 +17491,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17110
17491
  break;
17111
17492
  }
17112
17493
 
17113
- if (pm_array_node_size(array) != 0) {
17114
- expect1(parser, PM_TOKEN_COMMA, PM_ERR_ARRAY_SEPARATOR);
17494
+ // Ensure that we have a comma between elements in the array.
17495
+ if ((pm_array_node_size(array) != 0) && !accept1(parser, PM_TOKEN_COMMA)) {
17496
+ const uint8_t *location = parser->previous.end;
17497
+ PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
17498
+
17499
+ parser->previous.start = location;
17500
+ parser->previous.type = PM_TOKEN_MISSING;
17115
17501
  }
17116
17502
 
17117
17503
  // If we have a right bracket immediately following a comma,
@@ -17289,7 +17675,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17289
17675
 
17290
17676
  // If we didn't find a terminator and we didn't find a right
17291
17677
  // parenthesis, then this is a syntax error.
17292
- if (!terminator_found) {
17678
+ if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
17293
17679
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17294
17680
  }
17295
17681
 
@@ -17318,7 +17704,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17318
17704
  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
17319
17705
  } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17320
17706
  break;
17321
- } else {
17707
+ } else if (!match1(parser, PM_TOKEN_EOF)) {
17708
+ // If we're at the end of the file, then we're going to add
17709
+ // an error after this for the ) anyway.
17322
17710
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17323
17711
  }
17324
17712
  }
@@ -17537,8 +17925,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17537
17925
  ) {
17538
17926
  pm_arguments_t arguments = { 0 };
17539
17927
  parse_arguments_list(parser, &arguments, true, accepts_command_call);
17540
-
17541
17928
  pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
17929
+
17930
+ if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
17931
+ // If we're about to convert an 'it' implicit local
17932
+ // variable read into a method call, we need to remove
17933
+ // it from the list of implicit local variables.
17934
+ parse_target_implicit_parameter(parser, node);
17935
+ } else {
17936
+ // Otherwise, we're about to convert a regular local
17937
+ // variable read into a method call, in which case we
17938
+ // need to indicate that this was not a read for the
17939
+ // purposes of warnings.
17940
+ assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
17941
+
17942
+ if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
17943
+ parse_target_implicit_parameter(parser, node);
17944
+ } else {
17945
+ pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
17946
+ pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
17947
+ }
17948
+ }
17949
+
17542
17950
  pm_node_destroy(parser, node);
17543
17951
  return (pm_node_t *) fcall;
17544
17952
  }
@@ -17546,31 +17954,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17546
17954
 
17547
17955
  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17548
17956
  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
17549
- } else {
17550
- // Check if `it` is not going to be assigned.
17551
- switch (parser->current.type) {
17552
- case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
17553
- case PM_TOKEN_AMPERSAND_EQUAL:
17554
- case PM_TOKEN_CARET_EQUAL:
17555
- case PM_TOKEN_EQUAL:
17556
- case PM_TOKEN_GREATER_GREATER_EQUAL:
17557
- case PM_TOKEN_LESS_LESS_EQUAL:
17558
- case PM_TOKEN_MINUS_EQUAL:
17559
- case PM_TOKEN_PARENTHESIS_RIGHT:
17560
- case PM_TOKEN_PERCENT_EQUAL:
17561
- case PM_TOKEN_PIPE_EQUAL:
17562
- case PM_TOKEN_PIPE_PIPE_EQUAL:
17563
- case PM_TOKEN_PLUS_EQUAL:
17564
- case PM_TOKEN_SLASH_EQUAL:
17565
- case PM_TOKEN_STAR_EQUAL:
17566
- case PM_TOKEN_STAR_STAR_EQUAL:
17567
- break;
17568
- default:
17569
- // Once we know it's neither a method call nor an
17570
- // assignment, we can finally create `it` default
17571
- // parameter.
17572
- node = pm_node_check_it(parser, node);
17573
- }
17574
17957
  }
17575
17958
 
17576
17959
  return node;
@@ -17831,6 +18214,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17831
18214
  // as frozen because when clause strings are frozen.
17832
18215
  if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
17833
18216
  pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18217
+ } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18218
+ pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
17834
18219
  }
17835
18220
 
17836
18221
  pm_when_clause_static_literals_add(parser, &literals, condition);
@@ -17887,7 +18272,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17887
18272
  pm_token_t in_keyword = parser->previous;
17888
18273
 
17889
18274
  pm_constant_id_list_t captures = { 0 };
17890
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
18275
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
17891
18276
 
17892
18277
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17893
18278
  pm_constant_id_list_free(&captures);
@@ -17916,7 +18301,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17916
18301
  then_keyword = not_provided(parser);
17917
18302
  }
17918
18303
  } else {
17919
- expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18304
+ expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
17920
18305
  then_keyword = parser->previous;
17921
18306
  }
17922
18307
 
@@ -18236,7 +18621,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18236
18621
 
18237
18622
  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18238
18623
  receiver = parse_variable_call(parser);
18239
- receiver = pm_node_check_it(parser, receiver);
18240
18624
 
18241
18625
  pm_parser_scope_push(parser, true);
18242
18626
  lex_state_set(parser, PM_LEX_STATE_FNAME);
@@ -18370,7 +18754,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18370
18754
  lex_state_set(parser, PM_LEX_STATE_BEG);
18371
18755
  parser->command_start = true;
18372
18756
 
18373
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_DEF_PARAMS_TERM_PAREN);
18757
+ if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18758
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
18759
+ parser->previous.start = parser->previous.end;
18760
+ parser->previous.type = PM_TOKEN_MISSING;
18761
+ }
18762
+
18374
18763
  rparen = parser->previous;
18375
18764
  break;
18376
18765
  }
@@ -18568,7 +18957,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18568
18957
  if (match1(parser, PM_TOKEN_COMMA)) {
18569
18958
  index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
18570
18959
  } else {
18571
- index = parse_target(parser, index, false);
18960
+ index = parse_target(parser, index, false, false);
18572
18961
  }
18573
18962
 
18574
18963
  context_pop(parser);
@@ -19203,13 +19592,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19203
19592
  bool ascii_only = parser->current_regular_expression_ascii_only;
19204
19593
  parser_lex(parser);
19205
19594
 
19206
- // If we hit an end, then we can create a regular expression node
19207
- // without interpolation, which can be represented more succinctly and
19208
- // more easily compiled.
19595
+ // If we hit an end, then we can create a regular expression
19596
+ // node without interpolation, which can be represented more
19597
+ // succinctly and more easily compiled.
19209
19598
  if (accept1(parser, PM_TOKEN_REGEXP_END)) {
19210
- pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19211
- pm_node_flag_set(node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->flags));
19212
- return node;
19599
+ pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19600
+
19601
+ // If we're not immediately followed by a =~, then we want
19602
+ // to parse all of the errors at this point. If it is
19603
+ // followed by a =~, then it will get parsed higher up while
19604
+ // parsing the named captures as well.
19605
+ if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
19606
+ parse_regular_expression_errors(parser, node);
19607
+ }
19608
+
19609
+ pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
19610
+ return (pm_node_t *) node;
19213
19611
  }
19214
19612
 
19215
19613
  // If we get here, then we have interpolation so we'll need to create
@@ -19219,6 +19617,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19219
19617
  pm_token_t opening = not_provided(parser);
19220
19618
  pm_token_t closing = not_provided(parser);
19221
19619
  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
19620
+
19621
+ if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
19622
+ // This is extremely strange, but the first string part of a
19623
+ // regular expression will always be tagged as binary if we
19624
+ // are in a US-ASCII file, no matter its contents.
19625
+ pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
19626
+ }
19627
+
19222
19628
  pm_interpolated_regular_expression_node_append(interpolated, part);
19223
19629
  } else {
19224
19630
  // If the first part of the body of the regular expression is not a
@@ -19419,9 +19825,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19419
19825
 
19420
19826
  switch (parser->current.type) {
19421
19827
  case PM_TOKEN_PARENTHESIS_LEFT: {
19422
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
19423
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
19424
-
19425
19828
  pm_token_t opening = parser->current;
19426
19829
  parser_lex(parser);
19427
19830
 
@@ -19438,9 +19841,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19438
19841
  break;
19439
19842
  }
19440
19843
  case PM_CASE_PARAMETER: {
19441
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
19442
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
19443
-
19444
19844
  pm_accepts_block_stack_push(parser, false);
19445
19845
  pm_token_t opening = not_provided(parser);
19446
19846
  block_parameters = parse_block_parameters(parser, false, &opening, true);
@@ -19693,122 +20093,126 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
19693
20093
  }
19694
20094
 
19695
20095
  /**
19696
- * Returns true if the name of the capture group is a valid local variable that
19697
- * can be written to.
20096
+ * This struct is used to pass information between the regular expression parser
20097
+ * and the named capture callback.
19698
20098
  */
19699
- static bool
19700
- parse_regular_expression_named_capture(pm_parser_t *parser, const uint8_t *source, size_t length) {
19701
- if (length == 0) {
19702
- return false;
19703
- }
20099
+ typedef struct {
20100
+ /** The parser that is parsing the regular expression. */
20101
+ pm_parser_t *parser;
19704
20102
 
19705
- // First ensure that it starts with a valid identifier starting character.
19706
- size_t width = char_is_identifier_start(parser, source);
19707
- if (!width) {
19708
- return false;
19709
- }
20103
+ /** The call node wrapping the regular expression node. */
20104
+ pm_call_node_t *call;
19710
20105
 
19711
- // Next, ensure that it's not an uppercase character.
19712
- if (parser->encoding_changed) {
19713
- if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
19714
- } else {
19715
- if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
19716
- }
20106
+ /** The match write node that is being created. */
20107
+ pm_match_write_node_t *match;
19717
20108
 
19718
- // Next, iterate through all of the bytes of the string to ensure that they
19719
- // are all valid identifier characters.
19720
- const uint8_t *cursor = source + width;
19721
- while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
19722
- cursor += width;
19723
- }
20109
+ /** The list of names that have been parsed. */
20110
+ pm_constant_id_list_t names;
19724
20111
 
19725
- return cursor == source + length;
19726
- }
20112
+ /**
20113
+ * Whether the content of the regular expression is shared. This impacts
20114
+ * whether or not we used owned constants or shared constants in the
20115
+ * constant pool for the names of the captures.
20116
+ */
20117
+ bool shared;
20118
+ } parse_regular_expression_named_capture_data_t;
19727
20119
 
19728
20120
  /**
19729
- * Potentially change a =~ with a regular expression with named captures into a
19730
- * match write node.
20121
+ * This callback is called when the regular expression parser encounters a named
20122
+ * capture group.
19731
20123
  */
19732
- static pm_node_t *
19733
- parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
19734
- pm_string_list_t named_captures = { 0 };
19735
- pm_node_t *result;
20124
+ static void
20125
+ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20126
+ parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data;
19736
20127
 
19737
- if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, parser->encoding) && (named_captures.length > 0)) {
19738
- // Since we should not create a MatchWriteNode when all capture names
19739
- // are invalid, creating a MatchWriteNode is delaid here.
19740
- pm_match_write_node_t *match = NULL;
19741
- pm_constant_id_list_t names = { 0 };
20128
+ pm_parser_t *parser = callback_data->parser;
20129
+ pm_call_node_t *call = callback_data->call;
20130
+ pm_constant_id_list_t *names = &callback_data->names;
19742
20131
 
19743
- for (size_t index = 0; index < named_captures.length; index++) {
19744
- pm_string_t *string = &named_captures.strings[index];
20132
+ const uint8_t *source = pm_string_source(capture);
20133
+ size_t length = pm_string_length(capture);
19745
20134
 
19746
- const uint8_t *source = pm_string_source(string);
19747
- size_t length = pm_string_length(string);
20135
+ pm_location_t location;
20136
+ pm_constant_id_t name;
19748
20137
 
19749
- pm_location_t location;
19750
- pm_constant_id_t name;
20138
+ // If the name of the capture group isn't a valid identifier, we do
20139
+ // not add it to the local table.
20140
+ if (!pm_slice_is_valid_local(parser, source, source + length)) return;
19751
20141
 
19752
- // If the name of the capture group isn't a valid identifier, we do
19753
- // not add it to the local table.
19754
- if (!parse_regular_expression_named_capture(parser, source, length)) continue;
20142
+ if (callback_data->shared) {
20143
+ // If the unescaped string is a slice of the source, then we can
20144
+ // copy the names directly. The pointers will line up.
20145
+ location = (pm_location_t) { .start = source, .end = source + length };
20146
+ name = pm_parser_constant_id_location(parser, location.start, location.end);
20147
+ } else {
20148
+ // Otherwise, the name is a slice of the malloc-ed owned string,
20149
+ // in which case we need to copy it out into a new string.
20150
+ location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
19755
20151
 
19756
- if (content->type == PM_STRING_SHARED) {
19757
- // If the unescaped string is a slice of the source, then we can
19758
- // copy the names directly. The pointers will line up.
19759
- location = (pm_location_t) { .start = source, .end = source + length };
19760
- name = pm_parser_constant_id_location(parser, location.start, location.end);
19761
- } else {
19762
- // Otherwise, the name is a slice of the malloc-ed owned string,
19763
- // in which case we need to copy it out into a new string.
19764
- location = call->receiver->location;
20152
+ void *memory = xmalloc(length);
20153
+ if (memory == NULL) abort();
19765
20154
 
19766
- void *memory = xmalloc(length);
19767
- if (memory == NULL) abort();
20155
+ memcpy(memory, source, length);
20156
+ name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20157
+ }
19768
20158
 
19769
- memcpy(memory, source, length);
19770
- name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
19771
- }
20159
+ // Add this name to the list of constants if it is valid, not duplicated,
20160
+ // and not a keyword.
20161
+ if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20162
+ pm_constant_id_list_append(names, name);
19772
20163
 
19773
- if (name != 0) {
19774
- // We dont want to create duplicate targets if the capture name
19775
- // is duplicated.
19776
- if (pm_constant_id_list_includes(&names, name)) continue;
19777
- pm_constant_id_list_append(&names, name);
20164
+ int depth;
20165
+ if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20166
+ // If the local is not already a local but it is a keyword, then we
20167
+ // do not want to add a capture for this.
20168
+ if (pm_local_is_keyword((const char *) source, length)) return;
19778
20169
 
19779
- int depth;
19780
- if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
19781
- // If the identifier is not already a local, then we'll add
19782
- // it to the local table unless it's a keyword.
19783
- if (pm_local_is_keyword((const char *) source, length)) continue;
20170
+ // If the identifier is not already a local, then we will add it to
20171
+ // the local table.
20172
+ pm_parser_local_add(parser, name, location.start, location.end, 0);
20173
+ }
19784
20174
 
19785
- pm_parser_local_add(parser, name, location.start, location.end, 0);
19786
- }
20175
+ // Here we lazily create the MatchWriteNode since we know we're
20176
+ // about to add a target.
20177
+ if (callback_data->match == NULL) {
20178
+ callback_data->match = pm_match_write_node_create(parser, call);
20179
+ }
19787
20180
 
19788
- // Here we lazily create the MatchWriteNode since we know we're
19789
- // about to add a target.
19790
- if (match == NULL) match = pm_match_write_node_create(parser, call);
20181
+ // Next, create the local variable target and add it to the list of
20182
+ // targets for the match.
20183
+ pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
20184
+ pm_node_list_append(&callback_data->match->targets, target);
20185
+ }
20186
+ }
19791
20187
 
19792
- // Next, create the local variable target and add it to the
19793
- // list of targets for the match.
19794
- pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
19795
- pm_node_list_append(&match->targets, target);
19796
- }
19797
- }
20188
+ /**
20189
+ * Potentially change a =~ with a regular expression with named captures into a
20190
+ * match write node.
20191
+ */
20192
+ static pm_node_t *
20193
+ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
20194
+ parse_regular_expression_named_capture_data_t callback_data = {
20195
+ .parser = parser,
20196
+ .call = call,
20197
+ .names = { 0 },
20198
+ .shared = content->type == PM_STRING_SHARED
20199
+ };
19798
20200
 
19799
- if (match != NULL) {
19800
- result = (pm_node_t *) match;
19801
- } else {
19802
- result = (pm_node_t *) call;
19803
- }
20201
+ parse_regular_expression_error_data_t error_data = {
20202
+ .parser = parser,
20203
+ .start = call->receiver->location.start,
20204
+ .end = call->receiver->location.end,
20205
+ .shared = content->type == PM_STRING_SHARED
20206
+ };
20207
+
20208
+ pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20209
+ pm_constant_id_list_free(&callback_data.names);
19804
20210
 
19805
- pm_constant_id_list_free(&names);
20211
+ if (callback_data.match != NULL) {
20212
+ return (pm_node_t *) callback_data.match;
19806
20213
  } else {
19807
- result = (pm_node_t *) call;
20214
+ return (pm_node_t *) call;
19808
20215
  }
19809
-
19810
- pm_string_list_free(&named_captures);
19811
- return result;
19812
20216
  }
19813
20217
 
19814
20218
  static inline pm_node_t *
@@ -19925,7 +20329,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
19925
20329
  return result;
19926
20330
  }
19927
20331
  case PM_CALL_NODE: {
19928
- parser_lex(parser);
19929
20332
  pm_call_node_t *cast = (pm_call_node_t *) node;
19930
20333
 
19931
20334
  // If we have a vcall (a method with no arguments and no
@@ -19936,6 +20339,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
19936
20339
  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
19937
20340
 
19938
20341
  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20342
+ parser_lex(parser);
20343
+
19939
20344
  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
19940
20345
  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
19941
20346
 
@@ -19943,6 +20348,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
19943
20348
  return result;
19944
20349
  }
19945
20350
 
20351
+ // Move past the token here so that we have already added
20352
+ // the local variable by this point.
20353
+ parser_lex(parser);
20354
+
19946
20355
  // If there is no call operator and the message is "[]" then
19947
20356
  // this is an aref expression, and we can transform it into
19948
20357
  // an aset expression.
@@ -20038,7 +20447,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20038
20447
  return result;
20039
20448
  }
20040
20449
  case PM_CALL_NODE: {
20041
- parser_lex(parser);
20042
20450
  pm_call_node_t *cast = (pm_call_node_t *) node;
20043
20451
 
20044
20452
  // If we have a vcall (a method with no arguments and no
@@ -20049,6 +20457,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20049
20457
  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20050
20458
 
20051
20459
  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20460
+ parser_lex(parser);
20461
+
20052
20462
  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
20053
20463
  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
20054
20464
 
@@ -20056,6 +20466,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20056
20466
  return result;
20057
20467
  }
20058
20468
 
20469
+ // Move past the token here so that we have already added
20470
+ // the local variable by this point.
20471
+ parser_lex(parser);
20472
+
20059
20473
  // If there is no call operator and the message is "[]" then
20060
20474
  // this is an aref expression, and we can transform it into
20061
20475
  // an aset expression.
@@ -20209,7 +20623,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20209
20623
  // In this case we have an operator but we don't know what it's for.
20210
20624
  // We need to treat it as an error. For now, we'll mark it as an error
20211
20625
  // and just skip right past it.
20212
- pm_parser_err_previous(parser, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
20626
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
20213
20627
  return node;
20214
20628
  }
20215
20629
  }
@@ -20465,7 +20879,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20465
20879
 
20466
20880
  if (
20467
20881
  (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
20468
- (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))
20882
+ (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
20469
20883
  ) {
20470
20884
  // If we have a constant immediately following a '::' operator, then
20471
20885
  // this can either be a constant path or a method call, depending on
@@ -20591,7 +21005,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20591
21005
  parser_lex(parser);
20592
21006
 
20593
21007
  pm_constant_id_list_t captures = { 0 };
20594
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
21008
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
20595
21009
 
20596
21010
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
20597
21011
  pm_constant_id_list_free(&captures);
@@ -20608,7 +21022,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20608
21022
  parser_lex(parser);
20609
21023
 
20610
21024
  pm_constant_id_list_t captures = { 0 };
20611
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
21025
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
20612
21026
 
20613
21027
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
20614
21028
  pm_constant_id_list_free(&captures);
@@ -20621,6 +21035,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20621
21035
  }
20622
21036
  }
20623
21037
 
21038
+ #undef PM_PARSE_PATTERN_SINGLE
21039
+ #undef PM_PARSE_PATTERN_TOP
21040
+ #undef PM_PARSE_PATTERN_MULTI
21041
+
20624
21042
  /**
20625
21043
  * Parse an expression at the given point of the parser using the given binding
20626
21044
  * power to parse subsequent chains. If this function finds a syntax error, it
@@ -21004,7 +21422,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
21004
21422
 
21005
21423
  // Scopes given from the outside are not allowed to have numbered
21006
21424
  // parameters.
21007
- parser->current_scope->numbered_parameters = PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED;
21425
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
21008
21426
 
21009
21427
  for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
21010
21428
  const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
@@ -21392,331 +21810,3 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
21392
21810
  }
21393
21811
 
21394
21812
  #endif
21395
-
21396
- /** An error that is going to be formatted into the output. */
21397
- typedef struct {
21398
- /** A pointer to the diagnostic that was generated during parsing. */
21399
- pm_diagnostic_t *error;
21400
-
21401
- /** The start line of the diagnostic message. */
21402
- int32_t line;
21403
-
21404
- /** The column start of the diagnostic message. */
21405
- uint32_t column_start;
21406
-
21407
- /** The column end of the diagnostic message. */
21408
- uint32_t column_end;
21409
- } pm_error_t;
21410
-
21411
- /** The format that will be used to format the errors into the output. */
21412
- typedef struct {
21413
- /** The prefix that will be used for line numbers. */
21414
- const char *number_prefix;
21415
-
21416
- /** The prefix that will be used for blank lines. */
21417
- const char *blank_prefix;
21418
-
21419
- /** The divider that will be used between sections of source code. */
21420
- const char *divider;
21421
-
21422
- /** The length of the blank prefix. */
21423
- size_t blank_prefix_length;
21424
-
21425
- /** The length of the divider. */
21426
- size_t divider_length;
21427
- } pm_error_format_t;
21428
-
21429
- #define PM_COLOR_GRAY "\033[38;5;102m"
21430
- #define PM_COLOR_RED "\033[1;31m"
21431
- #define PM_COLOR_RESET "\033[m"
21432
-
21433
- static inline pm_error_t *
21434
- pm_parser_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
21435
- pm_error_t *errors = xcalloc(error_list->size, sizeof(pm_error_t));
21436
- if (errors == NULL) return NULL;
21437
-
21438
- int32_t start_line = parser->start_line;
21439
- for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
21440
- pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
21441
- pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
21442
-
21443
- // We're going to insert this error into the array in sorted order. We
21444
- // do this by finding the first error that has a line number greater
21445
- // than the current error and then inserting the current error before
21446
- // that one.
21447
- size_t index = 0;
21448
- while (
21449
- (index < error_list->size) &&
21450
- (errors[index].error != NULL) &&
21451
- (
21452
- (errors[index].line < start.line) ||
21453
- ((errors[index].line == start.line) && (errors[index].column_start < start.column))
21454
- )
21455
- ) index++;
21456
-
21457
- // Now we're going to shift all of the errors after this one down one
21458
- // index to make room for the new error.
21459
- if (index + 1 < error_list->size) {
21460
- memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
21461
- }
21462
-
21463
- // Finally, we'll insert the error into the array.
21464
- uint32_t column_end;
21465
- if (start.line == end.line) {
21466
- column_end = end.column;
21467
- } else {
21468
- column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
21469
- }
21470
-
21471
- // Ensure we have at least one column of error.
21472
- if (start.column == column_end) column_end++;
21473
-
21474
- errors[index] = (pm_error_t) {
21475
- .error = error,
21476
- .line = start.line,
21477
- .column_start = start.column,
21478
- .column_end = column_end
21479
- };
21480
- }
21481
-
21482
- return errors;
21483
- }
21484
-
21485
- static inline void
21486
- pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, pm_buffer_t *buffer) {
21487
- int32_t line_delta = line - parser->start_line;
21488
- assert(line_delta >= 0);
21489
-
21490
- size_t index = (size_t) line_delta;
21491
- assert(index < newline_list->size);
21492
-
21493
- const uint8_t *start = &parser->start[newline_list->offsets[index]];
21494
- const uint8_t *end;
21495
-
21496
- if (index >= newline_list->size - 1) {
21497
- end = parser->end;
21498
- } else {
21499
- end = &parser->start[newline_list->offsets[index + 1]];
21500
- }
21501
-
21502
- pm_buffer_append_format(buffer, number_prefix, line);
21503
- pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
21504
-
21505
- if (end == parser->end && end[-1] != '\n') {
21506
- pm_buffer_append_string(buffer, "\n", 1);
21507
- }
21508
- }
21509
-
21510
- /**
21511
- * Format the errors on the parser into the given buffer.
21512
- */
21513
- PRISM_EXPORTED_FUNCTION void
21514
- pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, pm_buffer_t *buffer, bool colorize, bool inline_messages) {
21515
- assert(error_list->size != 0);
21516
-
21517
- // First, we're going to sort all of the errors by line number using an
21518
- // insertion sort into a newly allocated array.
21519
- const int32_t start_line = parser->start_line;
21520
- const pm_newline_list_t *newline_list = &parser->newline_list;
21521
-
21522
- pm_error_t *errors = pm_parser_errors_format_sort(parser, error_list, newline_list);
21523
- if (errors == NULL) return;
21524
-
21525
- // Now we're going to determine how we're going to format line numbers and
21526
- // blank lines based on the maximum number of digits in the line numbers
21527
- // that are going to be displaid.
21528
- pm_error_format_t error_format;
21529
- int32_t first_line_number = errors[0].line;
21530
- int32_t last_line_number = errors[error_list->size - 1].line;
21531
-
21532
- // If we have a maximum line number that is negative, then we're going to
21533
- // use the absolute value for comparison but multiple by 10 to additionally
21534
- // have a column for the negative sign.
21535
- if (first_line_number < 0) first_line_number = (-first_line_number) * 10;
21536
- if (last_line_number < 0) last_line_number = (-last_line_number) * 10;
21537
- int32_t max_line_number = first_line_number > last_line_number ? first_line_number : last_line_number;
21538
-
21539
- if (max_line_number < 10) {
21540
- if (colorize) {
21541
- error_format = (pm_error_format_t) {
21542
- .number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
21543
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21544
- .divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
21545
- };
21546
- } else {
21547
- error_format = (pm_error_format_t) {
21548
- .number_prefix = "%1" PRIi32 " | ",
21549
- .blank_prefix = " | ",
21550
- .divider = " ~~~~~\n"
21551
- };
21552
- }
21553
- } else if (max_line_number < 100) {
21554
- if (colorize) {
21555
- error_format = (pm_error_format_t) {
21556
- .number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
21557
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21558
- .divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
21559
- };
21560
- } else {
21561
- error_format = (pm_error_format_t) {
21562
- .number_prefix = "%2" PRIi32 " | ",
21563
- .blank_prefix = " | ",
21564
- .divider = " ~~~~~~\n"
21565
- };
21566
- }
21567
- } else if (max_line_number < 1000) {
21568
- if (colorize) {
21569
- error_format = (pm_error_format_t) {
21570
- .number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
21571
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21572
- .divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
21573
- };
21574
- } else {
21575
- error_format = (pm_error_format_t) {
21576
- .number_prefix = "%3" PRIi32 " | ",
21577
- .blank_prefix = " | ",
21578
- .divider = " ~~~~~~~\n"
21579
- };
21580
- }
21581
- } else if (max_line_number < 10000) {
21582
- if (colorize) {
21583
- error_format = (pm_error_format_t) {
21584
- .number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
21585
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21586
- .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
21587
- };
21588
- } else {
21589
- error_format = (pm_error_format_t) {
21590
- .number_prefix = "%4" PRIi32 " | ",
21591
- .blank_prefix = " | ",
21592
- .divider = " ~~~~~~~~\n"
21593
- };
21594
- }
21595
- } else {
21596
- if (colorize) {
21597
- error_format = (pm_error_format_t) {
21598
- .number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
21599
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21600
- .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
21601
- };
21602
- } else {
21603
- error_format = (pm_error_format_t) {
21604
- .number_prefix = "%5" PRIi32 " | ",
21605
- .blank_prefix = " | ",
21606
- .divider = " ~~~~~~~~\n"
21607
- };
21608
- }
21609
- }
21610
-
21611
- error_format.blank_prefix_length = strlen(error_format.blank_prefix);
21612
- error_format.divider_length = strlen(error_format.divider);
21613
-
21614
- // Now we're going to iterate through every error in our error list and
21615
- // display it. While we're iterating, we will display some padding lines of
21616
- // the source before the error to give some context. We'll be careful not to
21617
- // display the same line twice in case the errors are close enough in the
21618
- // source.
21619
- int32_t last_line = parser->start_line - 1;
21620
- const pm_encoding_t *encoding = parser->encoding;
21621
-
21622
- for (size_t index = 0; index < error_list->size; index++) {
21623
- pm_error_t *error = &errors[index];
21624
-
21625
- // Here we determine how many lines of padding of the source to display,
21626
- // based on the difference from the last line that was displaid.
21627
- if (error->line - last_line > 1) {
21628
- if (error->line - last_line > 2) {
21629
- if ((index != 0) && (error->line - last_line > 3)) {
21630
- pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
21631
- }
21632
-
21633
- pm_buffer_append_string(buffer, " ", 2);
21634
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
21635
- }
21636
-
21637
- pm_buffer_append_string(buffer, " ", 2);
21638
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
21639
- }
21640
-
21641
- // If this is the first error or we're on a new line, then we'll display
21642
- // the line that has the error in it.
21643
- if ((index == 0) || (error->line != last_line)) {
21644
- if (colorize) {
21645
- pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 12);
21646
- } else {
21647
- pm_buffer_append_string(buffer, "> ", 2);
21648
- }
21649
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
21650
- }
21651
-
21652
- const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
21653
- if (start == parser->end) pm_buffer_append_byte(buffer, '\n');
21654
-
21655
- // Now we'll display the actual error message. We'll do this by first
21656
- // putting the prefix to the line, then a bunch of blank spaces
21657
- // depending on the column, then as many carets as we need to display
21658
- // the width of the error, then the error message itself.
21659
- //
21660
- // Note that this doesn't take into account the width of the actual
21661
- // character when displaid in the terminal. For some east-asian
21662
- // languages or emoji, this means it can be thrown off pretty badly. We
21663
- // will need to solve this eventually.
21664
- pm_buffer_append_string(buffer, " ", 2);
21665
- pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
21666
-
21667
- size_t column = 0;
21668
- while (column < error->column_start) {
21669
- pm_buffer_append_byte(buffer, ' ');
21670
-
21671
- size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21672
- column += (char_width == 0 ? 1 : char_width);
21673
- }
21674
-
21675
- if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
21676
- pm_buffer_append_byte(buffer, '^');
21677
-
21678
- size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21679
- column += (char_width == 0 ? 1 : char_width);
21680
-
21681
- while (column < error->column_end) {
21682
- pm_buffer_append_byte(buffer, '~');
21683
-
21684
- size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21685
- column += (char_width == 0 ? 1 : char_width);
21686
- }
21687
-
21688
- if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
21689
-
21690
- if (inline_messages) {
21691
- pm_buffer_append_byte(buffer, ' ');
21692
- assert(error->error != NULL);
21693
-
21694
- const char *message = error->error->message;
21695
- pm_buffer_append_string(buffer, message, strlen(message));
21696
- }
21697
-
21698
- pm_buffer_append_byte(buffer, '\n');
21699
-
21700
- // Here we determine how many lines of padding to display after the
21701
- // error, depending on where the next error is in source.
21702
- last_line = error->line;
21703
- int32_t next_line = (index == error_list->size - 1) ? (((int32_t) newline_list->size) + parser->start_line) : errors[index + 1].line;
21704
-
21705
- if (next_line - last_line > 1) {
21706
- pm_buffer_append_string(buffer, " ", 2);
21707
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
21708
- }
21709
-
21710
- if (next_line - last_line > 1) {
21711
- pm_buffer_append_string(buffer, " ", 2);
21712
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
21713
- }
21714
- }
21715
-
21716
- // Finally, we'll free the array of errors that we allocated.
21717
- xfree(errors);
21718
- }
21719
-
21720
- #undef PM_COLOR_GRAY
21721
- #undef PM_COLOR_RED
21722
- #undef PM_COLOR_RESET