prism 0.29.0 → 0.30.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +22 -1
  3. data/CONTRIBUTING.md +0 -4
  4. data/README.md +1 -0
  5. data/config.yml +66 -9
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/ripper_translation.md +22 -0
  8. data/ext/prism/api_node.c +30 -12
  9. data/ext/prism/extension.c +107 -372
  10. data/ext/prism/extension.h +1 -1
  11. data/include/prism/ast.h +138 -70
  12. data/include/prism/diagnostic.h +7 -2
  13. data/include/prism/node.h +0 -21
  14. data/include/prism/parser.h +23 -25
  15. data/include/prism/regexp.h +17 -8
  16. data/include/prism/static_literals.h +3 -2
  17. data/include/prism/util/pm_char.h +1 -2
  18. data/include/prism/util/pm_constant_pool.h +0 -8
  19. data/include/prism/util/pm_integer.h +16 -9
  20. data/include/prism/util/pm_string.h +0 -8
  21. data/include/prism/version.h +2 -2
  22. data/include/prism.h +0 -11
  23. data/lib/prism/compiler.rb +3 -0
  24. data/lib/prism/dispatcher.rb +14 -0
  25. data/lib/prism/dot_visitor.rb +22 -3
  26. data/lib/prism/dsl.rb +7 -2
  27. data/lib/prism/ffi.rb +24 -3
  28. data/lib/prism/inspect_visitor.rb +10 -8
  29. data/lib/prism/mutation_compiler.rb +6 -1
  30. data/lib/prism/node.rb +166 -241
  31. data/lib/prism/node_ext.rb +21 -5
  32. data/lib/prism/parse_result/comments.rb +0 -7
  33. data/lib/prism/parse_result/newlines.rb +101 -11
  34. data/lib/prism/parse_result.rb +17 -0
  35. data/lib/prism/reflection.rb +3 -1
  36. data/lib/prism/serialize.rb +80 -67
  37. data/lib/prism/translation/parser/compiler.rb +134 -114
  38. data/lib/prism/translation/parser.rb +6 -1
  39. data/lib/prism/translation/ripper.rb +8 -6
  40. data/lib/prism/translation/ruby_parser.rb +23 -5
  41. data/lib/prism/visitor.rb +3 -0
  42. data/lib/prism.rb +0 -4
  43. data/prism.gemspec +1 -4
  44. data/rbi/prism/node.rbi +63 -6
  45. data/rbi/prism/visitor.rbi +3 -0
  46. data/rbi/prism.rbi +6 -0
  47. data/sig/prism/dsl.rbs +4 -1
  48. data/sig/prism/mutation_compiler.rbs +1 -0
  49. data/sig/prism/node.rbs +28 -4
  50. data/sig/prism/visitor.rbs +1 -0
  51. data/sig/prism.rbs +21 -0
  52. data/src/diagnostic.c +27 -17
  53. data/src/node.c +408 -1666
  54. data/src/prettyprint.c +49 -6
  55. data/src/prism.c +958 -991
  56. data/src/regexp.c +133 -68
  57. data/src/serialize.c +6 -1
  58. data/src/static_literals.c +63 -84
  59. data/src/token_type.c +2 -2
  60. data/src/util/pm_constant_pool.c +0 -8
  61. data/src/util/pm_integer.c +39 -11
  62. data/src/util/pm_string.c +0 -12
  63. data/src/util/pm_strpbrk.c +32 -6
  64. metadata +2 -5
  65. data/include/prism/util/pm_string_list.h +0 -44
  66. data/lib/prism/debug.rb +0 -249
  67. data/src/util/pm_string_list.c +0 -28
data/src/prism.c CHANGED
@@ -423,7 +423,7 @@ lex_mode_pop(pm_parser_t *parser) {
423
423
  * This is the equivalent of IS_lex_state is CRuby.
424
424
  */
425
425
  static inline bool
426
- lex_state_p(pm_parser_t *parser, pm_lex_state_t state) {
426
+ lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
427
427
  return parser->lex_state & state;
428
428
  }
429
429
 
@@ -708,7 +708,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
708
708
  .previous = parser->current_scope,
709
709
  .locals = { 0 },
710
710
  .parameters = PM_SCOPE_PARAMETERS_NONE,
711
- .numbered_parameters = PM_SCOPE_NUMBERED_PARAMETERS_NONE,
711
+ .implicit_parameters = { 0 },
712
712
  .shareable_constant = (closed || parser->current_scope == NULL) ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
713
713
  .closed = closed
714
714
  };
@@ -1183,6 +1183,31 @@ pm_check_value_expression(pm_node_t *node) {
1183
1183
  return NULL;
1184
1184
  case PM_BEGIN_NODE: {
1185
1185
  pm_begin_node_t *cast = (pm_begin_node_t *) node;
1186
+
1187
+ if (cast->statements == NULL && cast->ensure_clause != NULL) {
1188
+ node = (pm_node_t *) cast->ensure_clause;
1189
+ }
1190
+ else {
1191
+ if (cast->rescue_clause != NULL) {
1192
+ if (cast->rescue_clause->statements == NULL) {
1193
+ return NULL;
1194
+ }
1195
+ else if (cast->else_clause != NULL) {
1196
+ node = (pm_node_t *) cast->else_clause;
1197
+ }
1198
+ else {
1199
+ node = (pm_node_t *) cast->statements;
1200
+ }
1201
+ }
1202
+ else {
1203
+ node = (pm_node_t *) cast->statements;
1204
+ }
1205
+ }
1206
+
1207
+ break;
1208
+ }
1209
+ case PM_ENSURE_NODE: {
1210
+ pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1186
1211
  node = (pm_node_t *) cast->statements;
1187
1212
  break;
1188
1213
  }
@@ -1630,7 +1655,7 @@ not_provided(pm_parser_t *parser) {
1630
1655
  return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1631
1656
  }
1632
1657
 
1633
- #define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = parser->start, .end = parser->start })
1658
+ #define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1634
1659
  #define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1635
1660
  #define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1636
1661
  #define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
@@ -2827,8 +2852,7 @@ static pm_call_node_t *
2827
2852
  pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2828
2853
  pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2829
2854
 
2830
- node->base.location.start = parser->start;
2831
- node->base.location.end = parser->start;
2855
+ node->base.location = PM_LOCATION_NULL_VALUE(parser);
2832
2856
  node->arguments = arguments;
2833
2857
 
2834
2858
  node->name = name;
@@ -4291,7 +4315,7 @@ pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4291
4315
  }
4292
4316
 
4293
4317
  /**
4294
- * Allocate and initialize a new FloatNode node from a FLOAT_RATIONAL token.
4318
+ * Allocate and initialize a new RationalNode node from a FLOAT_RATIONAL token.
4295
4319
  */
4296
4320
  static pm_rational_node_t *
4297
4321
  pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
@@ -4301,16 +4325,44 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4301
4325
  *node = (pm_rational_node_t) {
4302
4326
  {
4303
4327
  .type = PM_RATIONAL_NODE,
4304
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
4328
+ .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4305
4329
  .location = PM_LOCATION_TOKEN_VALUE(token)
4306
4330
  },
4307
- .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4308
- .type = PM_TOKEN_FLOAT,
4309
- .start = token->start,
4310
- .end = token->end - 1
4311
- }))
4331
+ .numerator = { 0 },
4332
+ .denominator = { 0 }
4312
4333
  };
4313
4334
 
4335
+ const uint8_t *start = token->start;
4336
+ const uint8_t *end = token->end - 1; // r
4337
+
4338
+ while (start < end && *start == '0') start++; // 0.1 -> .1
4339
+ while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4340
+
4341
+ size_t length = (size_t) (end - start);
4342
+ if (length == 1) {
4343
+ node->denominator.value = 1;
4344
+ return node;
4345
+ }
4346
+
4347
+ const uint8_t *point = memchr(start, '.', length);
4348
+ assert(point && "should have a decimal point");
4349
+
4350
+ uint8_t *digits = malloc(length);
4351
+ if (digits == NULL) {
4352
+ fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4353
+ abort();
4354
+ }
4355
+
4356
+ memcpy(digits, start, (unsigned long) (point - start));
4357
+ memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4358
+ pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4359
+
4360
+ digits[0] = '1';
4361
+ if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4362
+ pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4363
+ free(digits);
4364
+
4365
+ pm_integers_reduce(&node->numerator, &node->denominator);
4314
4366
  return node;
4315
4367
  }
4316
4368
 
@@ -4621,7 +4673,7 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant
4621
4673
  *node = (pm_global_variable_read_node_t) {
4622
4674
  {
4623
4675
  .type = PM_GLOBAL_VARIABLE_READ_NODE,
4624
- .location = { .start = parser->start, .end = parser->start }
4676
+ .location = PM_LOCATION_NULL_VALUE(parser)
4625
4677
  },
4626
4678
  .name = name
4627
4679
  };
@@ -4663,11 +4715,11 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan
4663
4715
  *node = (pm_global_variable_write_node_t) {
4664
4716
  {
4665
4717
  .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4666
- .location = { .start = parser->start, .end = parser->start }
4718
+ .location = PM_LOCATION_NULL_VALUE(parser)
4667
4719
  },
4668
4720
  .name = name,
4669
- .name_loc = { .start = parser->start, .end = parser->start },
4670
- .operator_loc = { .start = parser->start, .end = parser->start },
4721
+ .name_loc = PM_LOCATION_NULL_VALUE(parser),
4722
+ .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4671
4723
  .value = value
4672
4724
  };
4673
4725
 
@@ -4944,7 +4996,7 @@ pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, cons
4944
4996
  }
4945
4997
 
4946
4998
  /**
4947
- * Allocate and initialize a new IntegerNode node from an INTEGER_RATIONAL
4999
+ * Allocate and initialize a new RationalNode node from an INTEGER_RATIONAL
4948
5000
  * token.
4949
5001
  */
4950
5002
  static pm_rational_node_t *
@@ -4955,16 +5007,24 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const
4955
5007
  *node = (pm_rational_node_t) {
4956
5008
  {
4957
5009
  .type = PM_RATIONAL_NODE,
4958
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
5010
+ .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4959
5011
  .location = PM_LOCATION_TOKEN_VALUE(token)
4960
5012
  },
4961
- .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4962
- .type = PM_TOKEN_INTEGER,
4963
- .start = token->start,
4964
- .end = token->end - 1
4965
- }))
5013
+ .numerator = { 0 },
5014
+ .denominator = { .value = 1, 0 }
4966
5015
  };
4967
5016
 
5017
+ pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
5018
+ switch (base) {
5019
+ case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
5020
+ case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
5021
+ case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
5022
+ case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
5023
+ default: assert(false && "unreachable"); break;
5024
+ }
5025
+
5026
+ pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
5027
+
4968
5028
  return node;
4969
5029
  }
4970
5030
 
@@ -5462,6 +5522,23 @@ pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node,
5462
5522
  node->base.location.end = closing->end;
5463
5523
  }
5464
5524
 
5525
+ /**
5526
+ * Create a local variable read that is reading the implicit 'it' variable.
5527
+ */
5528
+ static pm_it_local_variable_read_node_t *
5529
+ pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5530
+ pm_it_local_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_it_local_variable_read_node_t);
5531
+
5532
+ *node = (pm_it_local_variable_read_node_t) {
5533
+ {
5534
+ .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5535
+ .location = PM_LOCATION_TOKEN_VALUE(name)
5536
+ }
5537
+ };
5538
+
5539
+ return node;
5540
+ }
5541
+
5465
5542
  /**
5466
5543
  * Allocate and initialize a new ItParametersNode node.
5467
5544
  */
@@ -5774,28 +5851,6 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5774
5851
  return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5775
5852
  }
5776
5853
 
5777
- /**
5778
- * Returns true if the given node is `it` default parameter.
5779
- */
5780
- static inline bool
5781
- pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
5782
- // Check if it's a local variable reference
5783
- if (node->type != PM_CALL_NODE) {
5784
- return false;
5785
- }
5786
-
5787
- // Check if it's a variable call
5788
- pm_call_node_t *call_node = (pm_call_node_t *) node;
5789
- if (!PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
5790
- return false;
5791
- }
5792
-
5793
- // Check if it's called `it`
5794
- pm_constant_id_t id = ((pm_call_node_t *)node)->name;
5795
- pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
5796
- return pm_token_is_it(constant->start, constant->start + constant->length);
5797
- }
5798
-
5799
5854
  /**
5800
5855
  * Returns true if the given bounds comprise a numbered parameter (i.e., they
5801
5856
  * are of the form /^_\d$/).
@@ -7355,9 +7410,9 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7355
7410
  {
7356
7411
  .type = PM_SYMBOL_NODE,
7357
7412
  .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7358
- .location = { .start = parser->start, .end = parser->start }
7413
+ .location = PM_LOCATION_NULL_VALUE(parser)
7359
7414
  },
7360
- .value_loc = { .start = parser->start, .end = parser->start },
7415
+ .value_loc = PM_LOCATION_NULL_VALUE(parser),
7361
7416
  .unescaped = { 0 }
7362
7417
  };
7363
7418
 
@@ -7758,10 +7813,10 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
7758
7813
  *node = (pm_while_node_t) {
7759
7814
  {
7760
7815
  .type = PM_WHILE_NODE,
7761
- .location = { .start = parser->start, .end = parser->start }
7816
+ .location = PM_LOCATION_NULL_VALUE(parser)
7762
7817
  },
7763
- .keyword_loc = { .start = parser->start, .end = parser->start },
7764
- .closing_loc = { .start = parser->start, .end = parser->start },
7818
+ .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7819
+ .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7765
7820
  .predicate = predicate,
7766
7821
  .statements = statements
7767
7822
  };
@@ -7916,51 +7971,6 @@ pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t leng
7916
7971
  return constant_id;
7917
7972
  }
7918
7973
 
7919
- /**
7920
- * Create a local variable read that is reading the implicit 'it' variable.
7921
- */
7922
- static pm_local_variable_read_node_t *
7923
- pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *name) {
7924
- if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_ORDINARY) {
7925
- pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
7926
- return NULL;
7927
- }
7928
-
7929
- if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED) {
7930
- pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
7931
- return NULL;
7932
- }
7933
-
7934
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IT;
7935
-
7936
- pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
7937
- pm_parser_local_add(parser, name_id, name->start, name->end, 0);
7938
-
7939
- return pm_local_variable_read_node_create_constant_id(parser, name, name_id, 0, false);
7940
- }
7941
-
7942
- /**
7943
- * Convert a `it` variable call node to a node for `it` default parameter.
7944
- */
7945
- static pm_node_t *
7946
- pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
7947
- if (
7948
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
7949
- !parser->current_scope->closed &&
7950
- (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
7951
- pm_node_is_it(parser, node)
7952
- ) {
7953
- pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
7954
-
7955
- if (read != NULL) {
7956
- pm_node_destroy(parser, node);
7957
- node = (pm_node_t *) read;
7958
- }
7959
- }
7960
-
7961
- return node;
7962
- }
7963
-
7964
7974
  /**
7965
7975
  * Add a parameter name to the current scope and check whether the name of the
7966
7976
  * parameter is unique or not.
@@ -7996,6 +8006,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
7996
8006
  pm_scope_t *scope = parser->current_scope;
7997
8007
  parser->current_scope = scope->previous;
7998
8008
  pm_locals_free(&scope->locals);
8009
+ pm_node_list_free(&scope->implicit_parameters);
7999
8010
  xfree(scope);
8000
8011
  }
8001
8012
 
@@ -8067,7 +8078,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) {
8067
8078
  * is beyond the end of the source then return '\0'.
8068
8079
  */
8069
8080
  static inline uint8_t
8070
- peek_at(pm_parser_t *parser, const uint8_t *cursor) {
8081
+ peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8071
8082
  if (cursor < parser->end) {
8072
8083
  return *cursor;
8073
8084
  } else {
@@ -8090,7 +8101,7 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8090
8101
  * that position is beyond the end of the source then return '\0'.
8091
8102
  */
8092
8103
  static inline uint8_t
8093
- peek(pm_parser_t *parser) {
8104
+ peek(const pm_parser_t *parser) {
8094
8105
  return peek_at(parser, parser->current.end);
8095
8106
  }
8096
8107
 
@@ -8155,6 +8166,14 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
8155
8166
  return memchr(cursor, '\n', (size_t) length);
8156
8167
  }
8157
8168
 
8169
+ /**
8170
+ * This is equivalent to the predicate of warn_balanced in CRuby.
8171
+ */
8172
+ static inline bool
8173
+ ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8174
+ return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8175
+ }
8176
+
8158
8177
  /**
8159
8178
  * Here we're going to check if this is a "magic" comment, and perform whatever
8160
8179
  * actions are necessary for it here.
@@ -8995,8 +9014,8 @@ lex_global_variable(pm_parser_t *parser) {
8995
9014
  // If we get here, then we have a $ followed by something that
8996
9015
  // isn't recognized as a global variable.
8997
9016
  pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8998
- size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8999
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
9017
+ const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9018
+ PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9000
9019
  }
9001
9020
 
9002
9021
  return PM_TOKEN_GLOBAL_VARIABLE;
@@ -9389,7 +9408,7 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9389
9408
  */
9390
9409
  static inline uint8_t
9391
9410
  escape_byte(uint8_t value, const uint8_t flags) {
9392
- if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x1f;
9411
+ if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9393
9412
  if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9394
9413
  return value;
9395
9414
  }
@@ -9489,22 +9508,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
9489
9508
  static inline void
9490
9509
  escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9491
9510
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9492
- pm_buffer_append_bytes(regular_expression_buffer, (const uint8_t *) "\\x", 2);
9493
-
9494
- uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
9495
- uint8_t byte2 = (uint8_t) (byte & 0xF);
9496
-
9497
- if (byte1 >= 0xA) {
9498
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
9499
- } else {
9500
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte1 + '0'));
9501
- }
9502
-
9503
- if (byte2 >= 0xA) {
9504
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 - 0xA + 'A'));
9505
- } else {
9506
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 + '0'));
9507
- }
9511
+ pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9508
9512
  }
9509
9513
 
9510
9514
  escape_write_byte_encoded(parser, buffer, byte);
@@ -9539,57 +9543,57 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9539
9543
  switch (peek(parser)) {
9540
9544
  case '\\': {
9541
9545
  parser->current.end++;
9542
- escape_write_byte_encoded(parser, buffer, escape_byte('\\', flags));
9546
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9543
9547
  return;
9544
9548
  }
9545
9549
  case '\'': {
9546
9550
  parser->current.end++;
9547
- escape_write_byte_encoded(parser, buffer, escape_byte('\'', flags));
9551
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9548
9552
  return;
9549
9553
  }
9550
9554
  case 'a': {
9551
9555
  parser->current.end++;
9552
- escape_write_byte_encoded(parser, buffer, escape_byte('\a', flags));
9556
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9553
9557
  return;
9554
9558
  }
9555
9559
  case 'b': {
9556
9560
  parser->current.end++;
9557
- escape_write_byte_encoded(parser, buffer, escape_byte('\b', flags));
9561
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9558
9562
  return;
9559
9563
  }
9560
9564
  case 'e': {
9561
9565
  parser->current.end++;
9562
- escape_write_byte_encoded(parser, buffer, escape_byte('\033', flags));
9566
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9563
9567
  return;
9564
9568
  }
9565
9569
  case 'f': {
9566
9570
  parser->current.end++;
9567
- escape_write_byte_encoded(parser, buffer, escape_byte('\f', flags));
9571
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9568
9572
  return;
9569
9573
  }
9570
9574
  case 'n': {
9571
9575
  parser->current.end++;
9572
- escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
9576
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9573
9577
  return;
9574
9578
  }
9575
9579
  case 'r': {
9576
9580
  parser->current.end++;
9577
- escape_write_byte_encoded(parser, buffer, escape_byte('\r', flags));
9581
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9578
9582
  return;
9579
9583
  }
9580
9584
  case 's': {
9581
9585
  parser->current.end++;
9582
- escape_write_byte_encoded(parser, buffer, escape_byte(' ', flags));
9586
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9583
9587
  return;
9584
9588
  }
9585
9589
  case 't': {
9586
9590
  parser->current.end++;
9587
- escape_write_byte_encoded(parser, buffer, escape_byte('\t', flags));
9591
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9588
9592
  return;
9589
9593
  }
9590
9594
  case 'v': {
9591
9595
  parser->current.end++;
9592
- escape_write_byte_encoded(parser, buffer, escape_byte('\v', flags));
9596
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9593
9597
  return;
9594
9598
  }
9595
9599
  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
@@ -9606,7 +9610,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9606
9610
  }
9607
9611
  }
9608
9612
 
9609
- escape_write_byte_encoded(parser, buffer, value);
9613
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9610
9614
  return;
9611
9615
  }
9612
9616
  case 'x': {
@@ -9625,11 +9629,16 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9625
9629
  parser->current.end++;
9626
9630
  }
9627
9631
 
9632
+ value = escape_byte(value, flags);
9628
9633
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9629
- pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9634
+ if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9635
+ pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9636
+ } else {
9637
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9638
+ }
9630
9639
  }
9631
9640
 
9632
- escape_write_byte_encoded(parser, buffer, escape_byte(value, flags));
9641
+ escape_write_byte_encoded(parser, buffer, value);
9633
9642
  } else {
9634
9643
  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9635
9644
  }
@@ -9658,7 +9667,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9658
9667
  pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9659
9668
  } else if (hexadecimal_length == 0) {
9660
9669
  // there are not hexadecimal characters
9661
- pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE);
9670
+ pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9671
+ pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9662
9672
  return;
9663
9673
  }
9664
9674
 
@@ -9707,10 +9717,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9707
9717
  }
9708
9718
  }
9709
9719
 
9710
- if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9711
- pm_parser_err(parser, start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9712
- }
9713
-
9714
9720
  return;
9715
9721
  }
9716
9722
  case 'c': {
@@ -9733,6 +9739,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9733
9739
  return;
9734
9740
  }
9735
9741
  parser->current.end++;
9742
+
9743
+ if (match(parser, 'u') || match(parser, 'U')) {
9744
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9745
+ return;
9746
+ }
9747
+
9736
9748
  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9737
9749
  return;
9738
9750
  case ' ':
@@ -9760,7 +9772,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9760
9772
  case 'C': {
9761
9773
  parser->current.end++;
9762
9774
  if (peek(parser) != '-') {
9763
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9775
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9776
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9764
9777
  return;
9765
9778
  }
9766
9779
 
@@ -9783,6 +9796,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9783
9796
  return;
9784
9797
  }
9785
9798
  parser->current.end++;
9799
+
9800
+ if (match(parser, 'u') || match(parser, 'U')) {
9801
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9802
+ return;
9803
+ }
9804
+
9786
9805
  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9787
9806
  return;
9788
9807
  case ' ':
@@ -9797,7 +9816,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9797
9816
  return;
9798
9817
  default: {
9799
9818
  if (!char_is_ascii_printable(peeked)) {
9800
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9819
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9820
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9801
9821
  return;
9802
9822
  }
9803
9823
 
@@ -9810,7 +9830,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9810
9830
  case 'M': {
9811
9831
  parser->current.end++;
9812
9832
  if (peek(parser) != '-') {
9813
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9833
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9834
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9814
9835
  return;
9815
9836
  }
9816
9837
 
@@ -9828,6 +9849,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9828
9849
  return;
9829
9850
  }
9830
9851
  parser->current.end++;
9852
+
9853
+ if (match(parser, 'u') || match(parser, 'U')) {
9854
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9855
+ return;
9856
+ }
9857
+
9831
9858
  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9832
9859
  return;
9833
9860
  case ' ':
@@ -9842,7 +9869,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9842
9869
  return;
9843
9870
  default:
9844
9871
  if (!char_is_ascii_printable(peeked)) {
9845
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9872
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9873
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9846
9874
  return;
9847
9875
  }
9848
9876
 
@@ -10803,6 +10831,8 @@ parser_lex(pm_parser_t *parser) {
10803
10831
  type = PM_TOKEN_USTAR_STAR;
10804
10832
  } else if (lex_state_beg_p(parser)) {
10805
10833
  type = PM_TOKEN_USTAR_STAR;
10834
+ } else if (ambiguous_operator_p(parser, space_seen)) {
10835
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10806
10836
  }
10807
10837
 
10808
10838
  if (lex_state_operator_p(parser)) {
@@ -10826,6 +10856,8 @@ parser_lex(pm_parser_t *parser) {
10826
10856
  type = PM_TOKEN_USTAR;
10827
10857
  } else if (lex_state_beg_p(parser)) {
10828
10858
  type = PM_TOKEN_USTAR;
10859
+ } else if (ambiguous_operator_p(parser, space_seen)) {
10860
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
10829
10861
  }
10830
10862
 
10831
10863
  if (lex_state_operator_p(parser)) {
@@ -10942,6 +10974,7 @@ parser_lex(pm_parser_t *parser) {
10942
10974
  // If we have quotes, then we're going to go until we find the
10943
10975
  // end quote.
10944
10976
  while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
10977
+ if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
10945
10978
  parser->current.end++;
10946
10979
  }
10947
10980
  }
@@ -10999,6 +11032,10 @@ parser_lex(pm_parser_t *parser) {
10999
11032
  LEX(PM_TOKEN_LESS_LESS_EQUAL);
11000
11033
  }
11001
11034
 
11035
+ if (ambiguous_operator_p(parser, space_seen)) {
11036
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11037
+ }
11038
+
11002
11039
  if (lex_state_operator_p(parser)) {
11003
11040
  lex_state_set(parser, PM_LEX_STATE_ARG);
11004
11041
  } else {
@@ -11112,6 +11149,8 @@ parser_lex(pm_parser_t *parser) {
11112
11149
  type = PM_TOKEN_UAMPERSAND;
11113
11150
  } else if (lex_state_beg_p(parser)) {
11114
11151
  type = PM_TOKEN_UAMPERSAND;
11152
+ } else if (ambiguous_operator_p(parser, space_seen)) {
11153
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11115
11154
  }
11116
11155
 
11117
11156
  if (lex_state_operator_p(parser)) {
@@ -11186,6 +11225,10 @@ parser_lex(pm_parser_t *parser) {
11186
11225
  LEX(PM_TOKEN_UPLUS);
11187
11226
  }
11188
11227
 
11228
+ if (ambiguous_operator_p(parser, space_seen)) {
11229
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11230
+ }
11231
+
11189
11232
  lex_state_set(parser, PM_LEX_STATE_BEG);
11190
11233
  LEX(PM_TOKEN_PLUS);
11191
11234
  }
@@ -11223,6 +11266,10 @@ parser_lex(pm_parser_t *parser) {
11223
11266
  LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
11224
11267
  }
11225
11268
 
11269
+ if (ambiguous_operator_p(parser, space_seen)) {
11270
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11271
+ }
11272
+
11226
11273
  lex_state_set(parser, PM_LEX_STATE_BEG);
11227
11274
  LEX(PM_TOKEN_MINUS);
11228
11275
  }
@@ -11321,6 +11368,10 @@ parser_lex(pm_parser_t *parser) {
11321
11368
  LEX(PM_TOKEN_REGEXP_BEGIN);
11322
11369
  }
11323
11370
 
11371
+ if (ambiguous_operator_p(parser, space_seen)) {
11372
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11373
+ }
11374
+
11324
11375
  if (lex_state_operator_p(parser)) {
11325
11376
  lex_state_set(parser, PM_LEX_STATE_ARG);
11326
11377
  } else {
@@ -11356,7 +11407,7 @@ parser_lex(pm_parser_t *parser) {
11356
11407
  // operator because we don't want to move into the string
11357
11408
  // lex mode unnecessarily.
11358
11409
  if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11359
- pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11410
+ pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11360
11411
  LEX(PM_TOKEN_PERCENT);
11361
11412
  }
11362
11413
 
@@ -11375,10 +11426,7 @@ parser_lex(pm_parser_t *parser) {
11375
11426
 
11376
11427
  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11377
11428
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11378
-
11379
- if (parser->current.end < parser->end) {
11380
- LEX(PM_TOKEN_STRING_BEGIN);
11381
- }
11429
+ LEX(PM_TOKEN_STRING_BEGIN);
11382
11430
  }
11383
11431
 
11384
11432
  // Delimiters for %-literals cannot be alphanumeric. We
@@ -11505,6 +11553,10 @@ parser_lex(pm_parser_t *parser) {
11505
11553
  }
11506
11554
  }
11507
11555
 
11556
+ if (ambiguous_operator_p(parser, space_seen)) {
11557
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11558
+ }
11559
+
11508
11560
  lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11509
11561
  LEX(PM_TOKEN_PERCENT);
11510
11562
  }
@@ -12315,9 +12367,10 @@ parser_lex(pm_parser_t *parser) {
12315
12367
 
12316
12368
  // If we are immediately following a newline and we have hit the
12317
12369
  // terminator, then we need to return the ending of the heredoc.
12318
- if (!line_continuation && current_token_starts_line(parser)) {
12370
+ if (current_token_starts_line(parser)) {
12319
12371
  const uint8_t *start = parser->current.start;
12320
- if (start + ident_length <= parser->end) {
12372
+
12373
+ if (!line_continuation && (start + ident_length <= parser->end)) {
12321
12374
  const uint8_t *newline = next_newline(start, parser->end - start);
12322
12375
  const uint8_t *ident_end = newline;
12323
12376
  const uint8_t *terminator_end = newline;
@@ -12473,11 +12526,8 @@ parser_lex(pm_parser_t *parser) {
12473
12526
  }
12474
12527
 
12475
12528
  parser->current.end = breakpoint + 1;
12476
-
12477
- if (!was_line_continuation) {
12478
- pm_token_buffer_flush(parser, &token_buffer);
12479
- LEX(PM_TOKEN_STRING_CONTENT);
12480
- }
12529
+ pm_token_buffer_flush(parser, &token_buffer);
12530
+ LEX(PM_TOKEN_STRING_CONTENT);
12481
12531
  }
12482
12532
 
12483
12533
  // Otherwise we hit a newline and it wasn't followed by
@@ -13112,11 +13162,40 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13112
13162
  return (pm_node_t *) result;
13113
13163
  }
13114
13164
 
13165
+ /**
13166
+ * When an implicit local variable is written to or targeted, it becomes a
13167
+ * regular, named local variable. This function removes it from the list of
13168
+ * implicit parameters when that happens.
13169
+ */
13170
+ static void
13171
+ parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13172
+ pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13173
+
13174
+ for (size_t index = 0; index < implicit_parameters->size; index++) {
13175
+ if (implicit_parameters->nodes[index] == node) {
13176
+ // If the node is not the last one in the list, we need to shift the
13177
+ // remaining nodes down to fill the gap. This is extremely unlikely
13178
+ // to happen.
13179
+ if (index != implicit_parameters->size - 1) {
13180
+ memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13181
+ }
13182
+
13183
+ implicit_parameters->size--;
13184
+ break;
13185
+ }
13186
+ }
13187
+ }
13188
+
13115
13189
  /**
13116
13190
  * Convert the given node into a valid target node.
13191
+ *
13192
+ * @param multiple Whether or not this target is part of a larger set of
13193
+ * targets. If it is, then the &. operator is not allowed.
13194
+ * @param splat Whether or not this target is a child of a splat target. If it
13195
+ * is, then fewer patterns are allowed.
13117
13196
  */
13118
13197
  static pm_node_t *
13119
- parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13198
+ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13120
13199
  switch (PM_NODE_TYPE(target)) {
13121
13200
  case PM_MISSING_NODE:
13122
13201
  return target;
@@ -13162,7 +13241,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13162
13241
  target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
13163
13242
  return target;
13164
13243
  case PM_LOCAL_VARIABLE_READ_NODE: {
13165
- pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
13244
+ if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13245
+ PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13246
+ parse_target_implicit_parameter(parser, target);
13247
+ }
13166
13248
 
13167
13249
  const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13168
13250
  uint32_t name = cast->name;
@@ -13174,17 +13256,32 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13174
13256
 
13175
13257
  return target;
13176
13258
  }
13259
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13260
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13261
+ pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13262
+
13263
+ parse_target_implicit_parameter(parser, target);
13264
+ pm_node_destroy(parser, target);
13265
+
13266
+ return node;
13267
+ }
13177
13268
  case PM_INSTANCE_VARIABLE_READ_NODE:
13178
13269
  assert(sizeof(pm_instance_variable_target_node_t) == sizeof(pm_instance_variable_read_node_t));
13179
13270
  target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
13180
13271
  return target;
13181
13272
  case PM_MULTI_TARGET_NODE:
13273
+ if (splat_parent) {
13274
+ // Multi target is not accepted in all positions. If this is one
13275
+ // of them, then we need to add an error.
13276
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13277
+ }
13278
+
13182
13279
  return target;
13183
13280
  case PM_SPLAT_NODE: {
13184
13281
  pm_splat_node_t *splat = (pm_splat_node_t *) target;
13185
13282
 
13186
13283
  if (splat->expression != NULL) {
13187
- splat->expression = parse_target(parser, splat->expression, multiple);
13284
+ splat->expression = parse_target(parser, splat->expression, multiple, true);
13188
13285
  }
13189
13286
 
13190
13287
  return (pm_node_t *) splat;
@@ -13254,9 +13351,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13254
13351
  */
13255
13352
  static pm_node_t *
13256
13353
  parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13257
- pm_node_t *result = parse_target(parser, target, multiple);
13354
+ pm_node_t *result = parse_target(parser, target, multiple, false);
13258
13355
 
13259
- // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
13356
+ // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13357
+ // parens after the targets.
13260
13358
  if (
13261
13359
  !match1(parser, PM_TOKEN_EQUAL) &&
13262
13360
  !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
@@ -13326,18 +13424,34 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13326
13424
  return (pm_node_t *) node;
13327
13425
  }
13328
13426
  case PM_LOCAL_VARIABLE_READ_NODE: {
13329
- pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
13330
13427
  pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
13331
13428
 
13332
13429
  pm_constant_id_t name = local_read->name;
13430
+ pm_location_t name_loc = target->location;
13431
+
13333
13432
  uint32_t depth = local_read->depth;
13334
- pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13433
+ pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13335
13434
 
13336
- pm_location_t name_loc = target->location;
13435
+ if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13436
+ pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13437
+ PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13438
+ parse_target_implicit_parameter(parser, target);
13439
+ }
13440
+
13441
+ pm_locals_unread(&scope->locals, name);
13337
13442
  pm_node_destroy(parser, target);
13338
13443
 
13339
13444
  return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13340
13445
  }
13446
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13447
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13448
+ pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13449
+
13450
+ parse_target_implicit_parameter(parser, target);
13451
+ pm_node_destroy(parser, target);
13452
+
13453
+ return node;
13454
+ }
13341
13455
  case PM_INSTANCE_VARIABLE_READ_NODE: {
13342
13456
  pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13343
13457
  pm_node_destroy(parser, target);
@@ -13491,7 +13605,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13491
13605
  bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13492
13606
 
13493
13607
  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13494
- pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
13608
+ pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13495
13609
 
13496
13610
  while (accept1(parser, PM_TOKEN_COMMA)) {
13497
13611
  if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -13507,7 +13621,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13507
13621
 
13508
13622
  if (token_begins_expression_p(parser->current.type)) {
13509
13623
  name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
13510
- name = parse_target(parser, name, true);
13624
+ name = parse_target(parser, name, true, true);
13511
13625
  }
13512
13626
 
13513
13627
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
@@ -13515,7 +13629,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13515
13629
  has_rest = true;
13516
13630
  } else if (token_begins_expression_p(parser->current.type)) {
13517
13631
  pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
13518
- target = parse_target(parser, target, true);
13632
+ target = parse_target(parser, target, true, false);
13519
13633
 
13520
13634
  pm_multi_target_node_targets_append(parser, result, target);
13521
13635
  } else if (!match1(parser, PM_TOKEN_EOF)) {
@@ -13552,8 +13666,8 @@ parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_
13552
13666
  */
13553
13667
  static pm_statements_node_t *
13554
13668
  parse_statements(pm_parser_t *parser, pm_context_t context) {
13555
- // First, skip past any optional terminators that might be at the beginning of
13556
- // the statements.
13669
+ // First, skip past any optional terminators that might be at the beginning
13670
+ // of the statements.
13557
13671
  while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13558
13672
 
13559
13673
  // If we have a terminator, then we can just return NULL.
@@ -13569,20 +13683,20 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13569
13683
  pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
13570
13684
  pm_statements_node_body_append(parser, statements, node);
13571
13685
 
13572
- // If we're recovering from a syntax error, then we need to stop parsing the
13573
- // statements now.
13686
+ // If we're recovering from a syntax error, then we need to stop parsing
13687
+ // the statements now.
13574
13688
  if (parser->recovering) {
13575
- // If this is the level of context where the recovery has happened, then
13576
- // we can mark the parser as done recovering.
13689
+ // If this is the level of context where the recovery has happened,
13690
+ // then we can mark the parser as done recovering.
13577
13691
  if (context_terminator(context, &parser->current)) parser->recovering = false;
13578
13692
  break;
13579
13693
  }
13580
13694
 
13581
- // If we have a terminator, then we will parse all consecutive terminators
13582
- // and then continue parsing the statements list.
13695
+ // If we have a terminator, then we will parse all consecutive
13696
+ // terminators and then continue parsing the statements list.
13583
13697
  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13584
- // If we have a terminator, then we will continue parsing the statements
13585
- // list.
13698
+ // If we have a terminator, then we will continue parsing the
13699
+ // statements list.
13586
13700
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13587
13701
  if (context_terminator(context, &parser->current)) break;
13588
13702
 
@@ -13590,27 +13704,28 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13590
13704
  continue;
13591
13705
  }
13592
13706
 
13593
- // At this point we have a list of statements that are not terminated by a
13594
- // newline or semicolon. At this point we need to check if we're at the end
13595
- // of the statements list. If we are, then we should break out of the loop.
13707
+ // At this point we have a list of statements that are not terminated by
13708
+ // a newline or semicolon. At this point we need to check if we're at
13709
+ // the end of the statements list. If we are, then we should break out
13710
+ // of the loop.
13596
13711
  if (context_terminator(context, &parser->current)) break;
13597
13712
 
13598
13713
  // At this point, we have a syntax error, because the statement was not
13599
13714
  // terminated by a newline or semicolon, and we're not at the end of the
13600
- // statements list. Ideally we should scan forward to determine if we should
13601
- // insert a missing terminator or break out of parsing the statements list
13602
- // at this point.
13715
+ // statements list. Ideally we should scan forward to determine if we
13716
+ // should insert a missing terminator or break out of parsing the
13717
+ // statements list at this point.
13603
13718
  //
13604
- // We don't have that yet, so instead we'll do a more naive approach. If we
13605
- // were unable to parse an expression, then we will skip past this token and
13606
- // continue parsing the statements list. Otherwise we'll add an error and
13607
- // continue parsing the statements list.
13719
+ // We don't have that yet, so instead we'll do a more naive approach. If
13720
+ // we were unable to parse an expression, then we will skip past this
13721
+ // token and continue parsing the statements list. Otherwise we'll add
13722
+ // an error and continue parsing the statements list.
13608
13723
  if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13609
13724
  parser_lex(parser);
13610
13725
 
13611
13726
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13612
13727
  if (context_terminator(context, &parser->current)) break;
13613
- } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
13728
+ } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13614
13729
  // This is an inlined version of accept1 because the error that we
13615
13730
  // want to add has varargs. If this happens again, we should
13616
13731
  // probably extract a helper function.
@@ -13632,7 +13747,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13632
13747
  */
13633
13748
  static void
13634
13749
  pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13635
- const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
13750
+ const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13636
13751
 
13637
13752
  if (duplicated != NULL) {
13638
13753
  pm_buffer_t buffer = { 0 };
@@ -13658,13 +13773,16 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
13658
13773
  */
13659
13774
  static void
13660
13775
  pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13661
- if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) {
13776
+ pm_node_t *previous;
13777
+
13778
+ if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
13662
13779
  pm_diagnostic_list_append_format(
13663
13780
  &parser->warning_list,
13664
13781
  node->location.start,
13665
13782
  node->location.end,
13666
13783
  PM_WARN_DUPLICATED_WHEN_CLAUSE,
13667
- pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
13784
+ pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
13785
+ pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
13668
13786
  );
13669
13787
  }
13670
13788
  }
@@ -14276,7 +14394,7 @@ parse_parameters(
14276
14394
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14277
14395
 
14278
14396
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14279
- uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
14397
+ uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14280
14398
 
14281
14399
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
14282
14400
  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
@@ -14289,7 +14407,7 @@ parse_parameters(
14289
14407
  // If the value of the parameter increased the number of
14290
14408
  // reads of that parameter, then we need to warn that we
14291
14409
  // have a circular definition.
14292
- if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
14410
+ if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14293
14411
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14294
14412
  }
14295
14413
 
@@ -14368,10 +14486,10 @@ parse_parameters(
14368
14486
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14369
14487
 
14370
14488
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14371
- uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
14489
+ uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14372
14490
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
14373
14491
 
14374
- if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
14492
+ if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14375
14493
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14376
14494
  }
14377
14495
 
@@ -14543,7 +14661,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14543
14661
  pm_rescue_node_operator_set(rescue, &parser->previous);
14544
14662
 
14545
14663
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14546
- reference = parse_target(parser, reference, false);
14664
+ reference = parse_target(parser, reference, false, false);
14547
14665
 
14548
14666
  pm_rescue_node_reference_set(rescue, reference);
14549
14667
  break;
@@ -14573,7 +14691,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14573
14691
  pm_rescue_node_operator_set(rescue, &parser->previous);
14574
14692
 
14575
14693
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14576
- reference = parse_target(parser, reference, false);
14694
+ reference = parse_target(parser, reference, false, false);
14577
14695
 
14578
14696
  pm_rescue_node_reference_set(rescue, reference);
14579
14697
  break;
@@ -14778,6 +14896,28 @@ parse_block_parameters(
14778
14896
  return block_parameters;
14779
14897
  }
14780
14898
 
14899
+ /**
14900
+ * Return true if any of the visible scopes to the current context are using
14901
+ * numbered parameters.
14902
+ */
14903
+ static bool
14904
+ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
14905
+ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14906
+ if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
14907
+ }
14908
+
14909
+ return false;
14910
+ }
14911
+
14912
+ /**
14913
+ * These are the names of the various numbered parameters. We have them here so
14914
+ * that when we insert them into the constant pool we can use a constant string
14915
+ * and not have to allocate.
14916
+ */
14917
+ static const char * const pm_numbered_parameter_names[] = {
14918
+ "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
14919
+ };
14920
+
14781
14921
  /**
14782
14922
  * Return the node that should be used in the parameters field of a block-like
14783
14923
  * (block or lambda) node, depending on the kind of parameters that were
@@ -14785,31 +14925,79 @@ parse_block_parameters(
14785
14925
  */
14786
14926
  static pm_node_t *
14787
14927
  parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
14788
- uint8_t masked = parser->current_scope->parameters & PM_SCOPE_PARAMETERS_TYPE_MASK;
14928
+ pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
14929
+
14930
+ // If we have ordinary parameters, then we will return them as the set of
14931
+ // parameters.
14932
+ if (parameters != NULL) {
14933
+ // If we also have implicit parameters, then this is an error.
14934
+ if (implicit_parameters->size > 0) {
14935
+ pm_node_t *node = implicit_parameters->nodes[0];
14936
+
14937
+ if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14938
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14939
+ } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14940
+ pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14941
+ } else {
14942
+ assert(false && "unreachable");
14943
+ }
14944
+ }
14789
14945
 
14790
- if (masked == PM_SCOPE_PARAMETERS_NONE) {
14791
- assert(parameters == NULL);
14792
- return NULL;
14793
- } else if (masked == PM_SCOPE_PARAMETERS_ORDINARY) {
14794
- assert(parameters != NULL);
14795
14946
  return parameters;
14796
- } else if (masked == PM_SCOPE_PARAMETERS_NUMBERED) {
14797
- assert(parameters == NULL);
14947
+ }
14948
+
14949
+ // If we don't have any implicit parameters, then the set of parameters is
14950
+ // NULL.
14951
+ if (implicit_parameters->size == 0) {
14952
+ return NULL;
14953
+ }
14954
+
14955
+ // If we don't have ordinary parameters, then we now must validate our set
14956
+ // of implicit parameters. We can only have numbered parameters or it, but
14957
+ // they cannot be mixed.
14958
+ uint8_t numbered_parameter = 0;
14959
+ bool it_parameter = false;
14960
+
14961
+ for (size_t index = 0; index < implicit_parameters->size; index++) {
14962
+ pm_node_t *node = implicit_parameters->nodes[index];
14963
+
14964
+ if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14965
+ if (it_parameter) {
14966
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
14967
+ } else if (outer_scope_using_numbered_parameters_p(parser)) {
14968
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
14969
+ } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
14970
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
14971
+ } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
14972
+ numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
14973
+ } else {
14974
+ assert(false && "unreachable");
14975
+ }
14976
+ } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14977
+ if (numbered_parameter > 0) {
14978
+ pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
14979
+ } else {
14980
+ it_parameter = true;
14981
+ }
14982
+ }
14983
+ }
14798
14984
 
14799
- int8_t maximum = parser->current_scope->numbered_parameters;
14800
- if (maximum > 0) {
14801
- const pm_location_t location = { .start = opening->start, .end = closing->end };
14802
- return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, (uint8_t) maximum);
14985
+ if (numbered_parameter > 0) {
14986
+ // Go through the parent scopes and mark them as being disallowed from
14987
+ // using numbered parameters because this inner scope is using them.
14988
+ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14989
+ scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
14803
14990
  }
14804
14991
 
14805
- return NULL;
14806
- } else if (masked == PM_SCOPE_PARAMETERS_IT) {
14807
- assert(parameters == NULL);
14992
+ const pm_location_t location = { .start = opening->start, .end = closing->end };
14993
+ return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
14994
+ }
14995
+
14996
+ if (it_parameter) {
14808
14997
  return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
14809
- } else {
14810
- assert(false && "unreachable");
14811
- return NULL;
14812
14998
  }
14999
+
15000
+ return NULL;
14813
15001
  }
14814
15002
 
14815
15003
  /**
@@ -14826,9 +15014,6 @@ parse_block(pm_parser_t *parser) {
14826
15014
  pm_block_parameters_node_t *block_parameters = NULL;
14827
15015
 
14828
15016
  if (accept1(parser, PM_TOKEN_PIPE)) {
14829
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
14830
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
14831
-
14832
15017
  pm_token_t block_parameters_opening = parser->previous;
14833
15018
  if (match1(parser, PM_TOKEN_PIPE)) {
14834
15019
  block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
@@ -15326,7 +15511,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
15326
15511
  #define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15327
15512
  case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15328
15513
  case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15329
- case PM_NUMBERED_REFERENCE_READ_NODE
15514
+ case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15330
15515
 
15331
15516
  // Assert here that the flags are the same so that we can safely switch the type
15332
15517
  // of the node without having to move the flags.
@@ -15384,6 +15569,10 @@ parse_string_part(pm_parser_t *parser) {
15384
15569
  // "aaa #{bbb} #@ccc ddd"
15385
15570
  // ^^^^^^
15386
15571
  case PM_TOKEN_EMBEXPR_BEGIN: {
15572
+ // Ruby disallows seeing encoding around interpolation in strings,
15573
+ // even though it is known at parse time.
15574
+ parser->explicit_encoding = NULL;
15575
+
15387
15576
  pm_lex_state_t state = parser->lex_state;
15388
15577
  int brace_nesting = parser->brace_nesting;
15389
15578
 
@@ -15406,6 +15595,13 @@ parse_string_part(pm_parser_t *parser) {
15406
15595
  expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15407
15596
  pm_token_t closing = parser->previous;
15408
15597
 
15598
+ // If this set of embedded statements only contains a single
15599
+ // statement, then Ruby does not consider it as a possible statement
15600
+ // that could emit a line event.
15601
+ if (statements != NULL && statements->body.size == 1) {
15602
+ pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
15603
+ }
15604
+
15409
15605
  return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
15410
15606
  }
15411
15607
 
@@ -15416,6 +15612,10 @@ parse_string_part(pm_parser_t *parser) {
15416
15612
  // "aaa #{bbb} #@ccc ddd"
15417
15613
  // ^^^^^
15418
15614
  case PM_TOKEN_EMBVAR: {
15615
+ // Ruby disallows seeing encoding around interpolation in strings,
15616
+ // even though it is known at parse time.
15617
+ parser->explicit_encoding = NULL;
15618
+
15419
15619
  lex_state_set(parser, PM_LEX_STATE_BEG);
15420
15620
  parser_lex(parser);
15421
15621
 
@@ -15731,74 +15931,43 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
15731
15931
  }
15732
15932
 
15733
15933
  /**
15734
- * Return true if any of the visible scopes to the current context are using
15735
- * numbered parameters.
15934
+ * Parse an identifier into either a local variable read. If the local variable
15935
+ * is not found, it returns NULL instead.
15736
15936
  */
15737
- static bool
15738
- outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15739
- for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15740
- if (scope->numbered_parameters > 0) return true;
15741
- }
15937
+ static pm_node_t *
15938
+ parse_variable(pm_parser_t *parser) {
15939
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
15940
+ int depth;
15742
15941
 
15743
- return false;
15744
- }
15745
-
15746
- /**
15747
- * These are the names of the various numbered parameters. We have them here so
15748
- * that when we insert them into the constant pool we can use a constant string
15749
- * and not have to allocate.
15750
- */
15751
- static const char * const pm_numbered_parameter_names[] = {
15752
- "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15753
- };
15754
-
15755
- /**
15756
- * Parse an identifier into either a local variable read. If the local variable
15757
- * is not found, it returns NULL instead.
15758
- */
15759
- static pm_local_variable_read_node_t *
15760
- parse_variable(pm_parser_t *parser) {
15761
- int depth;
15762
- if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
15763
- return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
15764
- }
15942
+ if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
15943
+ return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
15944
+ }
15765
15945
 
15766
15946
  pm_scope_t *current_scope = parser->current_scope;
15767
- if (!current_scope->closed && current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
15768
- // Now that we know we have a numbered parameter, we need to check
15769
- // if it's allowed in this context. If it is, then we will create a
15770
- // local variable read. If it's not, then we'll create a normal call
15771
- // node but add an error.
15772
- if (current_scope->parameters & PM_SCOPE_PARAMETERS_ORDINARY) {
15773
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15774
- } else if (current_scope->parameters & PM_SCOPE_PARAMETERS_IT) {
15775
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_IT);
15776
- } else if (outer_scope_using_numbered_parameters_p(parser)) {
15777
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
15778
- } else {
15779
- // Indicate that this scope is using numbered params so that child
15780
- // scopes cannot. We subtract the value for the character '0' to get
15781
- // the actual integer value of the number (only _1 through _9 are
15782
- // valid).
15783
- int8_t numbered_parameters = (int8_t) (parser->previous.start[1] - '0');
15784
- current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED;
15785
-
15786
- if (numbered_parameters > current_scope->numbered_parameters) {
15787
- current_scope->numbered_parameters = numbered_parameters;
15947
+ if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
15948
+ if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
15949
+ // When you use a numbered parameter, it implies the existence of
15950
+ // all of the locals that exist before it. For example, referencing
15951
+ // _2 means that _1 must exist. Therefore here we loop through all
15952
+ // of the possibilities and add them into the constant pool.
15953
+ uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
15954
+ for (uint8_t number = 1; number <= maximum; number++) {
15955
+ pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
15788
15956
  }
15789
15957
 
15790
- // When you use a numbered parameter, it implies the existence
15791
- // of all of the locals that exist before it. For example,
15792
- // referencing _2 means that _1 must exist. Therefore here we
15793
- // loop through all of the possibilities and add them into the
15794
- // constant pool.
15795
- for (int8_t numbered_param = 1; numbered_param <= numbered_parameters - 1; numbered_param++) {
15796
- pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_param - 1], 2);
15958
+ if (!match1(parser, PM_TOKEN_EQUAL)) {
15959
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
15797
15960
  }
15798
15961
 
15799
- // Finally we can create the local variable read node.
15800
- pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
15801
- return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
15962
+ pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
15963
+ pm_node_list_append(&current_scope->implicit_parameters, node);
15964
+
15965
+ return node;
15966
+ } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
15967
+ pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
15968
+ pm_node_list_append(&current_scope->implicit_parameters, node);
15969
+
15970
+ return node;
15802
15971
  }
15803
15972
  }
15804
15973
 
@@ -15813,8 +15982,8 @@ parse_variable_call(pm_parser_t *parser) {
15813
15982
  pm_node_flags_t flags = 0;
15814
15983
 
15815
15984
  if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
15816
- pm_local_variable_read_node_t *node = parse_variable(parser);
15817
- if (node != NULL) return (pm_node_t *) node;
15985
+ pm_node_t *node = parse_variable(parser);
15986
+ if (node != NULL) return node;
15818
15987
  flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
15819
15988
  }
15820
15989
 
@@ -15932,6 +16101,230 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
15932
16101
  nodes->size = write_index;
15933
16102
  }
15934
16103
 
16104
+ /**
16105
+ * Return a string content token at a particular location that is empty.
16106
+ */
16107
+ static pm_token_t
16108
+ parse_strings_empty_content(const uint8_t *location) {
16109
+ return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16110
+ }
16111
+
16112
+ /**
16113
+ * Parse a set of strings that could be concatenated together.
16114
+ */
16115
+ static inline pm_node_t *
16116
+ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16117
+ assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16118
+
16119
+ bool concating = false;
16120
+ bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
16121
+
16122
+ while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16123
+ pm_node_t *node = NULL;
16124
+
16125
+ // Here we have found a string literal. We'll parse it and add it to
16126
+ // the list of strings.
16127
+ const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16128
+ assert(lex_mode->mode == PM_LEX_STRING);
16129
+ bool lex_interpolation = lex_mode->as.string.interpolation;
16130
+
16131
+ pm_token_t opening = parser->current;
16132
+ parser_lex(parser);
16133
+
16134
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16135
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16136
+ // If we get here, then we have an end immediately after a
16137
+ // start. In that case we'll create an empty content token and
16138
+ // return an uninterpolated string.
16139
+ pm_token_t content = parse_strings_empty_content(parser->previous.start);
16140
+ pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16141
+
16142
+ pm_string_shared_init(&string->unescaped, content.start, content.end);
16143
+ node = (pm_node_t *) string;
16144
+ } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16145
+ // If we get here, then we have an end of a label immediately
16146
+ // after a start. In that case we'll create an empty symbol
16147
+ // node.
16148
+ pm_token_t content = parse_strings_empty_content(parser->previous.start);
16149
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16150
+
16151
+ pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16152
+ node = (pm_node_t *) symbol;
16153
+ } else if (!lex_interpolation) {
16154
+ // If we don't accept interpolation then we expect the string to
16155
+ // start with a single string content node.
16156
+ pm_string_t unescaped;
16157
+ pm_token_t content;
16158
+
16159
+ if (match1(parser, PM_TOKEN_EOF)) {
16160
+ unescaped = PM_STRING_EMPTY;
16161
+ content = not_provided(parser);
16162
+ } else {
16163
+ unescaped = parser->current_string;
16164
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16165
+ content = parser->previous;
16166
+ }
16167
+
16168
+ // It is unfortunately possible to have multiple string content
16169
+ // nodes in a row in the case that there's heredoc content in
16170
+ // the middle of the string, like this cursed example:
16171
+ //
16172
+ // <<-END+'b
16173
+ // a
16174
+ // END
16175
+ // c'+'d'
16176
+ //
16177
+ // In that case we need to switch to an interpolated string to
16178
+ // be able to contain all of the parts.
16179
+ if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16180
+ pm_node_list_t parts = { 0 };
16181
+
16182
+ pm_token_t delimiters = not_provided(parser);
16183
+ pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16184
+ pm_node_list_append(&parts, part);
16185
+
16186
+ do {
16187
+ part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16188
+ pm_node_list_append(&parts, part);
16189
+ parser_lex(parser);
16190
+ } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16191
+
16192
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16193
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16194
+
16195
+ pm_node_list_free(&parts);
16196
+ } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16197
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16198
+ } else if (match1(parser, PM_TOKEN_EOF)) {
16199
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16200
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16201
+ } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16202
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16203
+ } else {
16204
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16205
+ parser->previous.start = parser->previous.end;
16206
+ parser->previous.type = PM_TOKEN_MISSING;
16207
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16208
+ }
16209
+ } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16210
+ // In this case we've hit string content so we know the string
16211
+ // at least has something in it. We'll need to check if the
16212
+ // following token is the end (in which case we can return a
16213
+ // plain string) or if it's not then it has interpolation.
16214
+ pm_token_t content = parser->current;
16215
+ pm_string_t unescaped = parser->current_string;
16216
+ parser_lex(parser);
16217
+
16218
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16219
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16220
+ pm_node_flag_set(node, parse_unescaped_encoding(parser));
16221
+
16222
+ // Kind of odd behavior, but basically if we have an
16223
+ // unterminated string and it ends in a newline, we back up one
16224
+ // character so that the error message is on the last line of
16225
+ // content in the string.
16226
+ if (!accept1(parser, PM_TOKEN_STRING_END)) {
16227
+ const uint8_t *location = parser->previous.end;
16228
+ if (location > parser->start && location[-1] == '\n') location--;
16229
+ pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16230
+
16231
+ parser->previous.start = parser->previous.end;
16232
+ parser->previous.type = PM_TOKEN_MISSING;
16233
+ }
16234
+ } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16235
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16236
+ } else {
16237
+ // If we get here, then we have interpolation so we'll need
16238
+ // to create a string or symbol node with interpolation.
16239
+ pm_node_list_t parts = { 0 };
16240
+ pm_token_t string_opening = not_provided(parser);
16241
+ pm_token_t string_closing = not_provided(parser);
16242
+
16243
+ pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16244
+ pm_node_flag_set(part, parse_unescaped_encoding(parser));
16245
+ pm_node_list_append(&parts, part);
16246
+
16247
+ while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16248
+ if ((part = parse_string_part(parser)) != NULL) {
16249
+ pm_node_list_append(&parts, part);
16250
+ }
16251
+ }
16252
+
16253
+ if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16254
+ node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16255
+ } else if (match1(parser, PM_TOKEN_EOF)) {
16256
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16257
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16258
+ } else {
16259
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16260
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16261
+ }
16262
+
16263
+ pm_node_list_free(&parts);
16264
+ }
16265
+ } else {
16266
+ // If we get here, then the first part of the string is not plain
16267
+ // string content, in which case we need to parse the string as an
16268
+ // interpolated string.
16269
+ pm_node_list_t parts = { 0 };
16270
+ pm_node_t *part;
16271
+
16272
+ while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16273
+ if ((part = parse_string_part(parser)) != NULL) {
16274
+ pm_node_list_append(&parts, part);
16275
+ }
16276
+ }
16277
+
16278
+ if (accept1(parser, PM_TOKEN_LABEL_END)) {
16279
+ node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16280
+ } else if (match1(parser, PM_TOKEN_EOF)) {
16281
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16282
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16283
+ } else {
16284
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16285
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16286
+ }
16287
+
16288
+ pm_node_list_free(&parts);
16289
+ }
16290
+
16291
+ if (current == NULL) {
16292
+ // If the node we just parsed is a symbol node, then we can't
16293
+ // concatenate it with anything else, so we can now return that
16294
+ // node.
16295
+ if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16296
+ return node;
16297
+ }
16298
+
16299
+ // If we don't already have a node, then it's fine and we can just
16300
+ // set the result to be the node we just parsed.
16301
+ current = node;
16302
+ } else {
16303
+ // Otherwise we need to check the type of the node we just parsed.
16304
+ // If it cannot be concatenated with the previous node, then we'll
16305
+ // need to add a syntax error.
16306
+ if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16307
+ pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16308
+ }
16309
+
16310
+ // If we haven't already created our container for concatenation,
16311
+ // we'll do that now.
16312
+ if (!concating) {
16313
+ concating = true;
16314
+ pm_token_t bounds = not_provided(parser);
16315
+
16316
+ pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16317
+ pm_interpolated_string_node_append(container, current);
16318
+ current = (pm_node_t *) container;
16319
+ }
16320
+
16321
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16322
+ }
16323
+ }
16324
+
16325
+ return current;
16326
+ }
16327
+
15935
16328
  #define PM_PARSE_PATTERN_SINGLE 0
15936
16329
  #define PM_PARSE_PATTERN_TOP 1
15937
16330
  #define PM_PARSE_PATTERN_MULTI 2
@@ -16214,7 +16607,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
16214
16607
  */
16215
16608
  static void
16216
16609
  parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
16217
- if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
16610
+ if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
16218
16611
  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16219
16612
  }
16220
16613
  }
@@ -16289,8 +16682,20 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16289
16682
  pm_node_list_append(&assocs, assoc);
16290
16683
  }
16291
16684
  } else {
16292
- expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16293
- pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
16685
+ pm_node_t *key;
16686
+
16687
+ if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16688
+ key = parse_strings(parser, NULL);
16689
+
16690
+ if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16691
+ pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16692
+ } else if (!pm_symbol_node_label_p(key)) {
16693
+ pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16694
+ }
16695
+ } else {
16696
+ expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16697
+ key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
16698
+ }
16294
16699
 
16295
16700
  parse_pattern_hash_key(parser, &keys, key);
16296
16701
  pm_node_t *value = NULL;
@@ -16502,19 +16907,8 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16502
16907
  pm_node_t *variable = (pm_node_t *) parse_variable(parser);
16503
16908
 
16504
16909
  if (variable == NULL) {
16505
- if (
16506
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
16507
- !parser->current_scope->closed &&
16508
- (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
16509
- pm_token_is_it(parser->previous.start, parser->previous.end)
16510
- ) {
16511
- pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
16512
- if (read == NULL) read = pm_local_variable_read_node_create(parser, &parser->previous, 0);
16513
- variable = (pm_node_t *) read;
16514
- } else {
16515
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16516
- variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
16517
- }
16910
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16911
+ variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
16518
16912
  }
16519
16913
 
16520
16914
  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
@@ -16762,276 +17156,67 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
16762
17156
  }
16763
17157
 
16764
17158
  trailing_rest = true;
16765
- } else {
16766
- node = parse_pattern_primitives(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA);
16767
- }
16768
-
16769
- pm_node_list_append(&nodes, node);
16770
- }
16771
-
16772
- // If the first pattern and the last pattern are rest patterns, then we will
16773
- // call this a find pattern, regardless of how many rest patterns are in
16774
- // between because we know we already added the appropriate errors.
16775
- // Otherwise we will create an array pattern.
16776
- if (PM_NODE_TYPE_P(nodes.nodes[0], PM_SPLAT_NODE) && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
16777
- node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
16778
- } else {
16779
- node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
16780
- }
16781
-
16782
- xfree(nodes.nodes);
16783
- } else if (leading_rest) {
16784
- // Otherwise, if we parsed a single splat pattern, then we know we have an
16785
- // array pattern, so we can go ahead and create that node.
16786
- node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
16787
- }
16788
-
16789
- return node;
16790
- }
16791
-
16792
- /**
16793
- * Incorporate a negative sign into a numeric node by subtracting 1 character
16794
- * from its start bounds. If it's a compound node, then we will recursively
16795
- * apply this function to its value.
16796
- */
16797
- static inline void
16798
- parse_negative_numeric(pm_node_t *node) {
16799
- switch (PM_NODE_TYPE(node)) {
16800
- case PM_INTEGER_NODE: {
16801
- pm_integer_node_t *cast = (pm_integer_node_t *) node;
16802
- cast->base.location.start--;
16803
- cast->value.negative = true;
16804
- break;
16805
- }
16806
- case PM_FLOAT_NODE: {
16807
- pm_float_node_t *cast = (pm_float_node_t *) node;
16808
- cast->base.location.start--;
16809
- cast->value = -cast->value;
16810
- break;
16811
- }
16812
- case PM_RATIONAL_NODE:
16813
- node->location.start--;
16814
- parse_negative_numeric(((pm_rational_node_t *) node)->numeric);
16815
- break;
16816
- case PM_IMAGINARY_NODE:
16817
- node->location.start--;
16818
- parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
16819
- break;
16820
- default:
16821
- assert(false && "unreachable");
16822
- break;
16823
- }
16824
- }
16825
-
16826
- /**
16827
- * Return a string content token at a particular location that is empty.
16828
- */
16829
- static pm_token_t
16830
- parse_strings_empty_content(const uint8_t *location) {
16831
- return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16832
- }
16833
-
16834
- /**
16835
- * Parse a set of strings that could be concatenated together.
16836
- */
16837
- static inline pm_node_t *
16838
- parse_strings(pm_parser_t *parser, pm_node_t *current) {
16839
- assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16840
-
16841
- bool concating = false;
16842
- bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
16843
-
16844
- while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16845
- pm_node_t *node = NULL;
16846
-
16847
- // Here we have found a string literal. We'll parse it and add it to
16848
- // the list of strings.
16849
- const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16850
- assert(lex_mode->mode == PM_LEX_STRING);
16851
- bool lex_interpolation = lex_mode->as.string.interpolation;
16852
-
16853
- pm_token_t opening = parser->current;
16854
- parser_lex(parser);
16855
-
16856
- if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16857
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16858
- // If we get here, then we have an end immediately after a
16859
- // start. In that case we'll create an empty content token and
16860
- // return an uninterpolated string.
16861
- pm_token_t content = parse_strings_empty_content(parser->previous.start);
16862
- pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16863
-
16864
- pm_string_shared_init(&string->unescaped, content.start, content.end);
16865
- node = (pm_node_t *) string;
16866
- } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16867
- // If we get here, then we have an end of a label immediately
16868
- // after a start. In that case we'll create an empty symbol
16869
- // node.
16870
- pm_token_t content = parse_strings_empty_content(parser->previous.start);
16871
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16872
-
16873
- pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16874
- node = (pm_node_t *) symbol;
16875
- } else if (!lex_interpolation) {
16876
- // If we don't accept interpolation then we expect the string to
16877
- // start with a single string content node.
16878
- pm_string_t unescaped;
16879
- pm_token_t content;
16880
- if (match1(parser, PM_TOKEN_EOF)) {
16881
- unescaped = PM_STRING_EMPTY;
16882
- content = not_provided(parser);
16883
- } else {
16884
- unescaped = parser->current_string;
16885
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16886
- content = parser->previous;
16887
- }
16888
-
16889
- // It is unfortunately possible to have multiple string content
16890
- // nodes in a row in the case that there's heredoc content in
16891
- // the middle of the string, like this cursed example:
16892
- //
16893
- // <<-END+'b
16894
- // a
16895
- // END
16896
- // c'+'d'
16897
- //
16898
- // In that case we need to switch to an interpolated string to
16899
- // be able to contain all of the parts.
16900
- if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16901
- pm_node_list_t parts = { 0 };
16902
-
16903
- pm_token_t delimiters = not_provided(parser);
16904
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16905
- pm_node_list_append(&parts, part);
16906
-
16907
- do {
16908
- part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16909
- pm_node_list_append(&parts, part);
16910
- parser_lex(parser);
16911
- } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16912
-
16913
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16914
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16915
-
16916
- pm_node_list_free(&parts);
16917
- } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16918
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16919
- } else if (match1(parser, PM_TOKEN_EOF)) {
16920
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16921
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16922
- } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16923
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16924
- } else {
16925
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16926
- parser->previous.start = parser->previous.end;
16927
- parser->previous.type = PM_TOKEN_MISSING;
16928
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16929
- }
16930
- } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16931
- // In this case we've hit string content so we know the string
16932
- // at least has something in it. We'll need to check if the
16933
- // following token is the end (in which case we can return a
16934
- // plain string) or if it's not then it has interpolation.
16935
- pm_token_t content = parser->current;
16936
- pm_string_t unescaped = parser->current_string;
16937
- parser_lex(parser);
16938
-
16939
- if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16940
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16941
- pm_node_flag_set(node, parse_unescaped_encoding(parser));
16942
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16943
- } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16944
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16945
- } else {
16946
- // If we get here, then we have interpolation so we'll need
16947
- // to create a string or symbol node with interpolation.
16948
- pm_node_list_t parts = { 0 };
16949
- pm_token_t string_opening = not_provided(parser);
16950
- pm_token_t string_closing = not_provided(parser);
16951
-
16952
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16953
- pm_node_flag_set(part, parse_unescaped_encoding(parser));
16954
- pm_node_list_append(&parts, part);
16955
-
16956
- while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16957
- if ((part = parse_string_part(parser)) != NULL) {
16958
- pm_node_list_append(&parts, part);
16959
- }
16960
- }
16961
-
16962
- if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16963
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16964
- } else if (match1(parser, PM_TOKEN_EOF)) {
16965
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16966
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16967
- } else {
16968
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16969
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16970
- }
16971
-
16972
- pm_node_list_free(&parts);
16973
- }
16974
- } else {
16975
- // If we get here, then the first part of the string is not plain
16976
- // string content, in which case we need to parse the string as an
16977
- // interpolated string.
16978
- pm_node_list_t parts = { 0 };
16979
- pm_node_t *part;
16980
-
16981
- while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16982
- if ((part = parse_string_part(parser)) != NULL) {
16983
- pm_node_list_append(&parts, part);
16984
- }
16985
- }
16986
-
16987
- if (accept1(parser, PM_TOKEN_LABEL_END)) {
16988
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16989
- } else if (match1(parser, PM_TOKEN_EOF)) {
16990
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16991
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16992
- } else {
16993
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16994
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
17159
+ } else {
17160
+ node = parse_pattern_primitives(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA);
16995
17161
  }
16996
17162
 
16997
- pm_node_list_free(&parts);
17163
+ pm_node_list_append(&nodes, node);
16998
17164
  }
16999
17165
 
17000
- if (current == NULL) {
17001
- // If the node we just parsed is a symbol node, then we can't
17002
- // concatenate it with anything else, so we can now return that
17003
- // node.
17004
- if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
17005
- return node;
17006
- }
17007
-
17008
- // If we don't already have a node, then it's fine and we can just
17009
- // set the result to be the node we just parsed.
17010
- current = node;
17166
+ // If the first pattern and the last pattern are rest patterns, then we will
17167
+ // call this a find pattern, regardless of how many rest patterns are in
17168
+ // between because we know we already added the appropriate errors.
17169
+ // Otherwise we will create an array pattern.
17170
+ if (PM_NODE_TYPE_P(nodes.nodes[0], PM_SPLAT_NODE) && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17171
+ node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
17011
17172
  } else {
17012
- // Otherwise we need to check the type of the node we just parsed.
17013
- // If it cannot be concatenated with the previous node, then we'll
17014
- // need to add a syntax error.
17015
- if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
17016
- pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
17017
- }
17173
+ node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17174
+ }
17018
17175
 
17019
- // If we haven't already created our container for concatenation,
17020
- // we'll do that now.
17021
- if (!concating) {
17022
- concating = true;
17023
- pm_token_t bounds = not_provided(parser);
17176
+ xfree(nodes.nodes);
17177
+ } else if (leading_rest) {
17178
+ // Otherwise, if we parsed a single splat pattern, then we know we have an
17179
+ // array pattern, so we can go ahead and create that node.
17180
+ node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17181
+ }
17024
17182
 
17025
- pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
17026
- pm_interpolated_string_node_append(container, current);
17027
- current = (pm_node_t *) container;
17028
- }
17183
+ return node;
17184
+ }
17029
17185
 
17030
- pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
17186
+ /**
17187
+ * Incorporate a negative sign into a numeric node by subtracting 1 character
17188
+ * from its start bounds. If it's a compound node, then we will recursively
17189
+ * apply this function to its value.
17190
+ */
17191
+ static inline void
17192
+ parse_negative_numeric(pm_node_t *node) {
17193
+ switch (PM_NODE_TYPE(node)) {
17194
+ case PM_INTEGER_NODE: {
17195
+ pm_integer_node_t *cast = (pm_integer_node_t *) node;
17196
+ cast->base.location.start--;
17197
+ cast->value.negative = true;
17198
+ break;
17199
+ }
17200
+ case PM_FLOAT_NODE: {
17201
+ pm_float_node_t *cast = (pm_float_node_t *) node;
17202
+ cast->base.location.start--;
17203
+ cast->value = -cast->value;
17204
+ break;
17205
+ }
17206
+ case PM_RATIONAL_NODE: {
17207
+ pm_rational_node_t *cast = (pm_rational_node_t *) node;
17208
+ cast->base.location.start--;
17209
+ cast->numerator.negative = true;
17210
+ break;
17031
17211
  }
17212
+ case PM_IMAGINARY_NODE:
17213
+ node->location.start--;
17214
+ parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17215
+ break;
17216
+ default:
17217
+ assert(false && "unreachable");
17218
+ break;
17032
17219
  }
17033
-
17034
- return current;
17035
17220
  }
17036
17221
 
17037
17222
  /**
@@ -17229,6 +17414,63 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17229
17414
  }
17230
17415
  }
17231
17416
 
17417
+ /**
17418
+ * This struct is used to pass information between the regular expression parser
17419
+ * and the error callback.
17420
+ */
17421
+ typedef struct {
17422
+ /** The parser that we are parsing the regular expression for. */
17423
+ pm_parser_t *parser;
17424
+
17425
+ /** The start of the regular expression. */
17426
+ const uint8_t *start;
17427
+
17428
+ /** The end of the regular expression. */
17429
+ const uint8_t *end;
17430
+
17431
+ /**
17432
+ * Whether or not the source of the regular expression is shared. This
17433
+ * impacts the location of error messages, because if it is shared then we
17434
+ * can use the location directly and if it is not, then we use the bounds of
17435
+ * the regular expression itself.
17436
+ */
17437
+ bool shared;
17438
+ } parse_regular_expression_error_data_t;
17439
+
17440
+ /**
17441
+ * This callback is called when the regular expression parser encounters a
17442
+ * syntax error.
17443
+ */
17444
+ static void
17445
+ parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17446
+ parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
17447
+ pm_location_t location;
17448
+
17449
+ if (callback_data->shared) {
17450
+ location = (pm_location_t) { .start = start, .end = end };
17451
+ } else {
17452
+ location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
17453
+ }
17454
+
17455
+ PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
17456
+ }
17457
+
17458
+ /**
17459
+ * Parse the errors for the regular expression and add them to the parser.
17460
+ */
17461
+ static void
17462
+ parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
17463
+ const pm_string_t *unescaped = &node->unescaped;
17464
+ parse_regular_expression_error_data_t error_data = {
17465
+ .parser = parser,
17466
+ .start = node->base.location.start,
17467
+ .end = node->base.location.end,
17468
+ .shared = unescaped->type == PM_STRING_SHARED
17469
+ };
17470
+
17471
+ pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), NULL, NULL, parse_regular_expression_error, &error_data);
17472
+ }
17473
+
17232
17474
  /**
17233
17475
  * Parse an expression that begins with the previous node that we just lexed.
17234
17476
  */
@@ -17249,8 +17491,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17249
17491
  break;
17250
17492
  }
17251
17493
 
17252
- if (pm_array_node_size(array) != 0) {
17253
- expect1(parser, PM_TOKEN_COMMA, PM_ERR_ARRAY_SEPARATOR);
17494
+ // Ensure that we have a comma between elements in the array.
17495
+ if ((pm_array_node_size(array) != 0) && !accept1(parser, PM_TOKEN_COMMA)) {
17496
+ const uint8_t *location = parser->previous.end;
17497
+ PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
17498
+
17499
+ parser->previous.start = location;
17500
+ parser->previous.type = PM_TOKEN_MISSING;
17254
17501
  }
17255
17502
 
17256
17503
  // If we have a right bracket immediately following a comma,
@@ -17428,7 +17675,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17428
17675
 
17429
17676
  // If we didn't find a terminator and we didn't find a right
17430
17677
  // parenthesis, then this is a syntax error.
17431
- if (!terminator_found) {
17678
+ if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
17432
17679
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17433
17680
  }
17434
17681
 
@@ -17457,7 +17704,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17457
17704
  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
17458
17705
  } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17459
17706
  break;
17460
- } else {
17707
+ } else if (!match1(parser, PM_TOKEN_EOF)) {
17708
+ // If we're at the end of the file, then we're going to add
17709
+ // an error after this for the ) anyway.
17461
17710
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17462
17711
  }
17463
17712
  }
@@ -17676,8 +17925,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17676
17925
  ) {
17677
17926
  pm_arguments_t arguments = { 0 };
17678
17927
  parse_arguments_list(parser, &arguments, true, accepts_command_call);
17679
-
17680
17928
  pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
17929
+
17930
+ if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
17931
+ // If we're about to convert an 'it' implicit local
17932
+ // variable read into a method call, we need to remove
17933
+ // it from the list of implicit local variables.
17934
+ parse_target_implicit_parameter(parser, node);
17935
+ } else {
17936
+ // Otherwise, we're about to convert a regular local
17937
+ // variable read into a method call, in which case we
17938
+ // need to indicate that this was not a read for the
17939
+ // purposes of warnings.
17940
+ assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
17941
+
17942
+ if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
17943
+ parse_target_implicit_parameter(parser, node);
17944
+ } else {
17945
+ pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
17946
+ pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
17947
+ }
17948
+ }
17949
+
17681
17950
  pm_node_destroy(parser, node);
17682
17951
  return (pm_node_t *) fcall;
17683
17952
  }
@@ -17685,31 +17954,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17685
17954
 
17686
17955
  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17687
17956
  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
17688
- } else {
17689
- // Check if `it` is not going to be assigned.
17690
- switch (parser->current.type) {
17691
- case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
17692
- case PM_TOKEN_AMPERSAND_EQUAL:
17693
- case PM_TOKEN_CARET_EQUAL:
17694
- case PM_TOKEN_EQUAL:
17695
- case PM_TOKEN_GREATER_GREATER_EQUAL:
17696
- case PM_TOKEN_LESS_LESS_EQUAL:
17697
- case PM_TOKEN_MINUS_EQUAL:
17698
- case PM_TOKEN_PARENTHESIS_RIGHT:
17699
- case PM_TOKEN_PERCENT_EQUAL:
17700
- case PM_TOKEN_PIPE_EQUAL:
17701
- case PM_TOKEN_PIPE_PIPE_EQUAL:
17702
- case PM_TOKEN_PLUS_EQUAL:
17703
- case PM_TOKEN_SLASH_EQUAL:
17704
- case PM_TOKEN_STAR_EQUAL:
17705
- case PM_TOKEN_STAR_STAR_EQUAL:
17706
- break;
17707
- default:
17708
- // Once we know it's neither a method call nor an
17709
- // assignment, we can finally create `it` default
17710
- // parameter.
17711
- node = pm_node_check_it(parser, node);
17712
- }
17713
17957
  }
17714
17958
 
17715
17959
  return node;
@@ -17970,6 +18214,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17970
18214
  // as frozen because when clause strings are frozen.
17971
18215
  if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
17972
18216
  pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18217
+ } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18218
+ pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
17973
18219
  }
17974
18220
 
17975
18221
  pm_when_clause_static_literals_add(parser, &literals, condition);
@@ -18375,7 +18621,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18375
18621
 
18376
18622
  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18377
18623
  receiver = parse_variable_call(parser);
18378
- receiver = pm_node_check_it(parser, receiver);
18379
18624
 
18380
18625
  pm_parser_scope_push(parser, true);
18381
18626
  lex_state_set(parser, PM_LEX_STATE_FNAME);
@@ -18712,7 +18957,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18712
18957
  if (match1(parser, PM_TOKEN_COMMA)) {
18713
18958
  index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
18714
18959
  } else {
18715
- index = parse_target(parser, index, false);
18960
+ index = parse_target(parser, index, false, false);
18716
18961
  }
18717
18962
 
18718
18963
  context_pop(parser);
@@ -19347,13 +19592,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19347
19592
  bool ascii_only = parser->current_regular_expression_ascii_only;
19348
19593
  parser_lex(parser);
19349
19594
 
19350
- // If we hit an end, then we can create a regular expression node
19351
- // without interpolation, which can be represented more succinctly and
19352
- // more easily compiled.
19595
+ // If we hit an end, then we can create a regular expression
19596
+ // node without interpolation, which can be represented more
19597
+ // succinctly and more easily compiled.
19353
19598
  if (accept1(parser, PM_TOKEN_REGEXP_END)) {
19354
- pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19355
- pm_node_flag_set(node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->flags));
19356
- return node;
19599
+ pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19600
+
19601
+ // If we're not immediately followed by a =~, then we want
19602
+ // to parse all of the errors at this point. If it is
19603
+ // followed by a =~, then it will get parsed higher up while
19604
+ // parsing the named captures as well.
19605
+ if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
19606
+ parse_regular_expression_errors(parser, node);
19607
+ }
19608
+
19609
+ pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
19610
+ return (pm_node_t *) node;
19357
19611
  }
19358
19612
 
19359
19613
  // If we get here, then we have interpolation so we'll need to create
@@ -19571,9 +19825,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19571
19825
 
19572
19826
  switch (parser->current.type) {
19573
19827
  case PM_TOKEN_PARENTHESIS_LEFT: {
19574
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
19575
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
19576
-
19577
19828
  pm_token_t opening = parser->current;
19578
19829
  parser_lex(parser);
19579
19830
 
@@ -19590,9 +19841,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19590
19841
  break;
19591
19842
  }
19592
19843
  case PM_CASE_PARAMETER: {
19593
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
19594
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
19595
-
19596
19844
  pm_accepts_block_stack_push(parser, false);
19597
19845
  pm_token_t opening = not_provided(parser);
19598
19846
  block_parameters = parse_block_parameters(parser, false, &opening, true);
@@ -19845,89 +20093,126 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
19845
20093
  }
19846
20094
 
19847
20095
  /**
19848
- * Potentially change a =~ with a regular expression with named captures into a
19849
- * match write node.
20096
+ * This struct is used to pass information between the regular expression parser
20097
+ * and the named capture callback.
19850
20098
  */
19851
- static pm_node_t *
19852
- parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
19853
- pm_string_list_t named_captures = { 0 };
19854
- pm_node_t *result;
20099
+ typedef struct {
20100
+ /** The parser that is parsing the regular expression. */
20101
+ pm_parser_t *parser;
19855
20102
 
19856
- if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, parser->encoding) && (named_captures.length > 0)) {
19857
- // Since we should not create a MatchWriteNode when all capture names
19858
- // are invalid, creating a MatchWriteNode is delaid here.
19859
- pm_match_write_node_t *match = NULL;
19860
- pm_constant_id_list_t names = { 0 };
20103
+ /** The call node wrapping the regular expression node. */
20104
+ pm_call_node_t *call;
19861
20105
 
19862
- for (size_t index = 0; index < named_captures.length; index++) {
19863
- pm_string_t *string = &named_captures.strings[index];
20106
+ /** The match write node that is being created. */
20107
+ pm_match_write_node_t *match;
19864
20108
 
19865
- const uint8_t *source = pm_string_source(string);
19866
- size_t length = pm_string_length(string);
20109
+ /** The list of names that have been parsed. */
20110
+ pm_constant_id_list_t names;
19867
20111
 
19868
- pm_location_t location;
19869
- pm_constant_id_t name;
20112
+ /**
20113
+ * Whether the content of the regular expression is shared. This impacts
20114
+ * whether or not we used owned constants or shared constants in the
20115
+ * constant pool for the names of the captures.
20116
+ */
20117
+ bool shared;
20118
+ } parse_regular_expression_named_capture_data_t;
19870
20119
 
19871
- // If the name of the capture group isn't a valid identifier, we do
19872
- // not add it to the local table.
19873
- if (!pm_slice_is_valid_local(parser, source, source + length)) continue;
20120
+ /**
20121
+ * This callback is called when the regular expression parser encounters a named
20122
+ * capture group.
20123
+ */
20124
+ static void
20125
+ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20126
+ parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data;
19874
20127
 
19875
- if (content->type == PM_STRING_SHARED) {
19876
- // If the unescaped string is a slice of the source, then we can
19877
- // copy the names directly. The pointers will line up.
19878
- location = (pm_location_t) { .start = source, .end = source + length };
19879
- name = pm_parser_constant_id_location(parser, location.start, location.end);
19880
- } else {
19881
- // Otherwise, the name is a slice of the malloc-ed owned string,
19882
- // in which case we need to copy it out into a new string.
19883
- location = call->receiver->location;
20128
+ pm_parser_t *parser = callback_data->parser;
20129
+ pm_call_node_t *call = callback_data->call;
20130
+ pm_constant_id_list_t *names = &callback_data->names;
19884
20131
 
19885
- void *memory = xmalloc(length);
19886
- if (memory == NULL) abort();
20132
+ const uint8_t *source = pm_string_source(capture);
20133
+ size_t length = pm_string_length(capture);
19887
20134
 
19888
- memcpy(memory, source, length);
19889
- name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
19890
- }
20135
+ pm_location_t location;
20136
+ pm_constant_id_t name;
19891
20137
 
19892
- if (name != 0) {
19893
- // We dont want to create duplicate targets if the capture name
19894
- // is duplicated.
19895
- if (pm_constant_id_list_includes(&names, name)) continue;
19896
- pm_constant_id_list_append(&names, name);
20138
+ // If the name of the capture group isn't a valid identifier, we do
20139
+ // not add it to the local table.
20140
+ if (!pm_slice_is_valid_local(parser, source, source + length)) return;
19897
20141
 
19898
- int depth;
19899
- if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
19900
- // If the identifier is not already a local, then we'll add
19901
- // it to the local table unless it's a keyword.
19902
- if (pm_local_is_keyword((const char *) source, length)) continue;
20142
+ if (callback_data->shared) {
20143
+ // If the unescaped string is a slice of the source, then we can
20144
+ // copy the names directly. The pointers will line up.
20145
+ location = (pm_location_t) { .start = source, .end = source + length };
20146
+ name = pm_parser_constant_id_location(parser, location.start, location.end);
20147
+ } else {
20148
+ // Otherwise, the name is a slice of the malloc-ed owned string,
20149
+ // in which case we need to copy it out into a new string.
20150
+ location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
19903
20151
 
19904
- pm_parser_local_add(parser, name, location.start, location.end, 0);
19905
- }
20152
+ void *memory = xmalloc(length);
20153
+ if (memory == NULL) abort();
19906
20154
 
19907
- // Here we lazily create the MatchWriteNode since we know we're
19908
- // about to add a target.
19909
- if (match == NULL) match = pm_match_write_node_create(parser, call);
20155
+ memcpy(memory, source, length);
20156
+ name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20157
+ }
19910
20158
 
19911
- // Next, create the local variable target and add it to the
19912
- // list of targets for the match.
19913
- pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
19914
- pm_node_list_append(&match->targets, target);
19915
- }
20159
+ // Add this name to the list of constants if it is valid, not duplicated,
20160
+ // and not a keyword.
20161
+ if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20162
+ pm_constant_id_list_append(names, name);
20163
+
20164
+ int depth;
20165
+ if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20166
+ // If the local is not already a local but it is a keyword, then we
20167
+ // do not want to add a capture for this.
20168
+ if (pm_local_is_keyword((const char *) source, length)) return;
20169
+
20170
+ // If the identifier is not already a local, then we will add it to
20171
+ // the local table.
20172
+ pm_parser_local_add(parser, name, location.start, location.end, 0);
19916
20173
  }
19917
20174
 
19918
- if (match != NULL) {
19919
- result = (pm_node_t *) match;
19920
- } else {
19921
- result = (pm_node_t *) call;
20175
+ // Here we lazily create the MatchWriteNode since we know we're
20176
+ // about to add a target.
20177
+ if (callback_data->match == NULL) {
20178
+ callback_data->match = pm_match_write_node_create(parser, call);
19922
20179
  }
19923
20180
 
19924
- pm_constant_id_list_free(&names);
19925
- } else {
19926
- result = (pm_node_t *) call;
20181
+ // Next, create the local variable target and add it to the list of
20182
+ // targets for the match.
20183
+ pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
20184
+ pm_node_list_append(&callback_data->match->targets, target);
19927
20185
  }
20186
+ }
19928
20187
 
19929
- pm_string_list_free(&named_captures);
19930
- return result;
20188
+ /**
20189
+ * Potentially change a =~ with a regular expression with named captures into a
20190
+ * match write node.
20191
+ */
20192
+ static pm_node_t *
20193
+ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
20194
+ parse_regular_expression_named_capture_data_t callback_data = {
20195
+ .parser = parser,
20196
+ .call = call,
20197
+ .names = { 0 },
20198
+ .shared = content->type == PM_STRING_SHARED
20199
+ };
20200
+
20201
+ parse_regular_expression_error_data_t error_data = {
20202
+ .parser = parser,
20203
+ .start = call->receiver->location.start,
20204
+ .end = call->receiver->location.end,
20205
+ .shared = content->type == PM_STRING_SHARED
20206
+ };
20207
+
20208
+ pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20209
+ pm_constant_id_list_free(&callback_data.names);
20210
+
20211
+ if (callback_data.match != NULL) {
20212
+ return (pm_node_t *) callback_data.match;
20213
+ } else {
20214
+ return (pm_node_t *) call;
20215
+ }
19931
20216
  }
19932
20217
 
19933
20218
  static inline pm_node_t *
@@ -20044,7 +20329,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20044
20329
  return result;
20045
20330
  }
20046
20331
  case PM_CALL_NODE: {
20047
- parser_lex(parser);
20048
20332
  pm_call_node_t *cast = (pm_call_node_t *) node;
20049
20333
 
20050
20334
  // If we have a vcall (a method with no arguments and no
@@ -20055,6 +20339,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20055
20339
  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20056
20340
 
20057
20341
  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20342
+ parser_lex(parser);
20343
+
20058
20344
  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
20059
20345
  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
20060
20346
 
@@ -20062,6 +20348,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20062
20348
  return result;
20063
20349
  }
20064
20350
 
20351
+ // Move past the token here so that we have already added
20352
+ // the local variable by this point.
20353
+ parser_lex(parser);
20354
+
20065
20355
  // If there is no call operator and the message is "[]" then
20066
20356
  // this is an aref expression, and we can transform it into
20067
20357
  // an aset expression.
@@ -20157,7 +20447,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20157
20447
  return result;
20158
20448
  }
20159
20449
  case PM_CALL_NODE: {
20160
- parser_lex(parser);
20161
20450
  pm_call_node_t *cast = (pm_call_node_t *) node;
20162
20451
 
20163
20452
  // If we have a vcall (a method with no arguments and no
@@ -20168,6 +20457,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20168
20457
  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20169
20458
 
20170
20459
  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20460
+ parser_lex(parser);
20461
+
20171
20462
  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
20172
20463
  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
20173
20464
 
@@ -20175,6 +20466,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20175
20466
  return result;
20176
20467
  }
20177
20468
 
20469
+ // Move past the token here so that we have already added
20470
+ // the local variable by this point.
20471
+ parser_lex(parser);
20472
+
20178
20473
  // If there is no call operator and the message is "[]" then
20179
20474
  // this is an aref expression, and we can transform it into
20180
20475
  // an aset expression.
@@ -20584,7 +20879,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20584
20879
 
20585
20880
  if (
20586
20881
  (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
20587
- (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))
20882
+ (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
20588
20883
  ) {
20589
20884
  // If we have a constant immediately following a '::' operator, then
20590
20885
  // this can either be a constant path or a method call, depending on
@@ -21127,7 +21422,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
21127
21422
 
21128
21423
  // Scopes given from the outside are not allowed to have numbered
21129
21424
  // parameters.
21130
- parser->current_scope->numbered_parameters = PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED;
21425
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
21131
21426
 
21132
21427
  for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
21133
21428
  const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
@@ -21515,331 +21810,3 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
21515
21810
  }
21516
21811
 
21517
21812
  #endif
21518
-
21519
- /** An error that is going to be formatted into the output. */
21520
- typedef struct {
21521
- /** A pointer to the diagnostic that was generated during parsing. */
21522
- pm_diagnostic_t *error;
21523
-
21524
- /** The start line of the diagnostic message. */
21525
- int32_t line;
21526
-
21527
- /** The column start of the diagnostic message. */
21528
- uint32_t column_start;
21529
-
21530
- /** The column end of the diagnostic message. */
21531
- uint32_t column_end;
21532
- } pm_error_t;
21533
-
21534
- /** The format that will be used to format the errors into the output. */
21535
- typedef struct {
21536
- /** The prefix that will be used for line numbers. */
21537
- const char *number_prefix;
21538
-
21539
- /** The prefix that will be used for blank lines. */
21540
- const char *blank_prefix;
21541
-
21542
- /** The divider that will be used between sections of source code. */
21543
- const char *divider;
21544
-
21545
- /** The length of the blank prefix. */
21546
- size_t blank_prefix_length;
21547
-
21548
- /** The length of the divider. */
21549
- size_t divider_length;
21550
- } pm_error_format_t;
21551
-
21552
- #define PM_COLOR_GRAY "\033[38;5;102m"
21553
- #define PM_COLOR_RED "\033[1;31m"
21554
- #define PM_COLOR_RESET "\033[m"
21555
-
21556
- static inline pm_error_t *
21557
- pm_parser_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
21558
- pm_error_t *errors = xcalloc(error_list->size, sizeof(pm_error_t));
21559
- if (errors == NULL) return NULL;
21560
-
21561
- int32_t start_line = parser->start_line;
21562
- for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
21563
- pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
21564
- pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
21565
-
21566
- // We're going to insert this error into the array in sorted order. We
21567
- // do this by finding the first error that has a line number greater
21568
- // than the current error and then inserting the current error before
21569
- // that one.
21570
- size_t index = 0;
21571
- while (
21572
- (index < error_list->size) &&
21573
- (errors[index].error != NULL) &&
21574
- (
21575
- (errors[index].line < start.line) ||
21576
- ((errors[index].line == start.line) && (errors[index].column_start < start.column))
21577
- )
21578
- ) index++;
21579
-
21580
- // Now we're going to shift all of the errors after this one down one
21581
- // index to make room for the new error.
21582
- if (index + 1 < error_list->size) {
21583
- memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
21584
- }
21585
-
21586
- // Finally, we'll insert the error into the array.
21587
- uint32_t column_end;
21588
- if (start.line == end.line) {
21589
- column_end = end.column;
21590
- } else {
21591
- column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
21592
- }
21593
-
21594
- // Ensure we have at least one column of error.
21595
- if (start.column == column_end) column_end++;
21596
-
21597
- errors[index] = (pm_error_t) {
21598
- .error = error,
21599
- .line = start.line,
21600
- .column_start = start.column,
21601
- .column_end = column_end
21602
- };
21603
- }
21604
-
21605
- return errors;
21606
- }
21607
-
21608
- static inline void
21609
- pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, pm_buffer_t *buffer) {
21610
- int32_t line_delta = line - parser->start_line;
21611
- assert(line_delta >= 0);
21612
-
21613
- size_t index = (size_t) line_delta;
21614
- assert(index < newline_list->size);
21615
-
21616
- const uint8_t *start = &parser->start[newline_list->offsets[index]];
21617
- const uint8_t *end;
21618
-
21619
- if (index >= newline_list->size - 1) {
21620
- end = parser->end;
21621
- } else {
21622
- end = &parser->start[newline_list->offsets[index + 1]];
21623
- }
21624
-
21625
- pm_buffer_append_format(buffer, number_prefix, line);
21626
- pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
21627
-
21628
- if (end == parser->end && end[-1] != '\n') {
21629
- pm_buffer_append_string(buffer, "\n", 1);
21630
- }
21631
- }
21632
-
21633
- /**
21634
- * Format the errors on the parser into the given buffer.
21635
- */
21636
- PRISM_EXPORTED_FUNCTION void
21637
- pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, pm_buffer_t *buffer, bool colorize, bool inline_messages) {
21638
- assert(error_list->size != 0);
21639
-
21640
- // First, we're going to sort all of the errors by line number using an
21641
- // insertion sort into a newly allocated array.
21642
- const int32_t start_line = parser->start_line;
21643
- const pm_newline_list_t *newline_list = &parser->newline_list;
21644
-
21645
- pm_error_t *errors = pm_parser_errors_format_sort(parser, error_list, newline_list);
21646
- if (errors == NULL) return;
21647
-
21648
- // Now we're going to determine how we're going to format line numbers and
21649
- // blank lines based on the maximum number of digits in the line numbers
21650
- // that are going to be displaid.
21651
- pm_error_format_t error_format;
21652
- int32_t first_line_number = errors[0].line;
21653
- int32_t last_line_number = errors[error_list->size - 1].line;
21654
-
21655
- // If we have a maximum line number that is negative, then we're going to
21656
- // use the absolute value for comparison but multiple by 10 to additionally
21657
- // have a column for the negative sign.
21658
- if (first_line_number < 0) first_line_number = (-first_line_number) * 10;
21659
- if (last_line_number < 0) last_line_number = (-last_line_number) * 10;
21660
- int32_t max_line_number = first_line_number > last_line_number ? first_line_number : last_line_number;
21661
-
21662
- if (max_line_number < 10) {
21663
- if (colorize) {
21664
- error_format = (pm_error_format_t) {
21665
- .number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
21666
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21667
- .divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
21668
- };
21669
- } else {
21670
- error_format = (pm_error_format_t) {
21671
- .number_prefix = "%1" PRIi32 " | ",
21672
- .blank_prefix = " | ",
21673
- .divider = " ~~~~~\n"
21674
- };
21675
- }
21676
- } else if (max_line_number < 100) {
21677
- if (colorize) {
21678
- error_format = (pm_error_format_t) {
21679
- .number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
21680
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21681
- .divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
21682
- };
21683
- } else {
21684
- error_format = (pm_error_format_t) {
21685
- .number_prefix = "%2" PRIi32 " | ",
21686
- .blank_prefix = " | ",
21687
- .divider = " ~~~~~~\n"
21688
- };
21689
- }
21690
- } else if (max_line_number < 1000) {
21691
- if (colorize) {
21692
- error_format = (pm_error_format_t) {
21693
- .number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
21694
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21695
- .divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
21696
- };
21697
- } else {
21698
- error_format = (pm_error_format_t) {
21699
- .number_prefix = "%3" PRIi32 " | ",
21700
- .blank_prefix = " | ",
21701
- .divider = " ~~~~~~~\n"
21702
- };
21703
- }
21704
- } else if (max_line_number < 10000) {
21705
- if (colorize) {
21706
- error_format = (pm_error_format_t) {
21707
- .number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
21708
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21709
- .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
21710
- };
21711
- } else {
21712
- error_format = (pm_error_format_t) {
21713
- .number_prefix = "%4" PRIi32 " | ",
21714
- .blank_prefix = " | ",
21715
- .divider = " ~~~~~~~~\n"
21716
- };
21717
- }
21718
- } else {
21719
- if (colorize) {
21720
- error_format = (pm_error_format_t) {
21721
- .number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
21722
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21723
- .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
21724
- };
21725
- } else {
21726
- error_format = (pm_error_format_t) {
21727
- .number_prefix = "%5" PRIi32 " | ",
21728
- .blank_prefix = " | ",
21729
- .divider = " ~~~~~~~~\n"
21730
- };
21731
- }
21732
- }
21733
-
21734
- error_format.blank_prefix_length = strlen(error_format.blank_prefix);
21735
- error_format.divider_length = strlen(error_format.divider);
21736
-
21737
- // Now we're going to iterate through every error in our error list and
21738
- // display it. While we're iterating, we will display some padding lines of
21739
- // the source before the error to give some context. We'll be careful not to
21740
- // display the same line twice in case the errors are close enough in the
21741
- // source.
21742
- int32_t last_line = parser->start_line - 1;
21743
- const pm_encoding_t *encoding = parser->encoding;
21744
-
21745
- for (size_t index = 0; index < error_list->size; index++) {
21746
- pm_error_t *error = &errors[index];
21747
-
21748
- // Here we determine how many lines of padding of the source to display,
21749
- // based on the difference from the last line that was displaid.
21750
- if (error->line - last_line > 1) {
21751
- if (error->line - last_line > 2) {
21752
- if ((index != 0) && (error->line - last_line > 3)) {
21753
- pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
21754
- }
21755
-
21756
- pm_buffer_append_string(buffer, " ", 2);
21757
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
21758
- }
21759
-
21760
- pm_buffer_append_string(buffer, " ", 2);
21761
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
21762
- }
21763
-
21764
- // If this is the first error or we're on a new line, then we'll display
21765
- // the line that has the error in it.
21766
- if ((index == 0) || (error->line != last_line)) {
21767
- if (colorize) {
21768
- pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 12);
21769
- } else {
21770
- pm_buffer_append_string(buffer, "> ", 2);
21771
- }
21772
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
21773
- }
21774
-
21775
- const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
21776
- if (start == parser->end) pm_buffer_append_byte(buffer, '\n');
21777
-
21778
- // Now we'll display the actual error message. We'll do this by first
21779
- // putting the prefix to the line, then a bunch of blank spaces
21780
- // depending on the column, then as many carets as we need to display
21781
- // the width of the error, then the error message itself.
21782
- //
21783
- // Note that this doesn't take into account the width of the actual
21784
- // character when displaid in the terminal. For some east-asian
21785
- // languages or emoji, this means it can be thrown off pretty badly. We
21786
- // will need to solve this eventually.
21787
- pm_buffer_append_string(buffer, " ", 2);
21788
- pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
21789
-
21790
- size_t column = 0;
21791
- while (column < error->column_start) {
21792
- pm_buffer_append_byte(buffer, ' ');
21793
-
21794
- size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21795
- column += (char_width == 0 ? 1 : char_width);
21796
- }
21797
-
21798
- if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
21799
- pm_buffer_append_byte(buffer, '^');
21800
-
21801
- size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21802
- column += (char_width == 0 ? 1 : char_width);
21803
-
21804
- while (column < error->column_end) {
21805
- pm_buffer_append_byte(buffer, '~');
21806
-
21807
- size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21808
- column += (char_width == 0 ? 1 : char_width);
21809
- }
21810
-
21811
- if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
21812
-
21813
- if (inline_messages) {
21814
- pm_buffer_append_byte(buffer, ' ');
21815
- assert(error->error != NULL);
21816
-
21817
- const char *message = error->error->message;
21818
- pm_buffer_append_string(buffer, message, strlen(message));
21819
- }
21820
-
21821
- pm_buffer_append_byte(buffer, '\n');
21822
-
21823
- // Here we determine how many lines of padding to display after the
21824
- // error, depending on where the next error is in source.
21825
- last_line = error->line;
21826
- int32_t next_line = (index == error_list->size - 1) ? (((int32_t) newline_list->size) + parser->start_line) : errors[index + 1].line;
21827
-
21828
- if (next_line - last_line > 1) {
21829
- pm_buffer_append_string(buffer, " ", 2);
21830
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
21831
- }
21832
-
21833
- if (next_line - last_line > 1) {
21834
- pm_buffer_append_string(buffer, " ", 2);
21835
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
21836
- }
21837
- }
21838
-
21839
- // Finally, we'll free the array of errors that we allocated.
21840
- xfree(errors);
21841
- }
21842
-
21843
- #undef PM_COLOR_GRAY
21844
- #undef PM_COLOR_RED
21845
- #undef PM_COLOR_RESET