prism 0.29.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +22 -1
  3. data/CONTRIBUTING.md +0 -4
  4. data/README.md +1 -0
  5. data/config.yml +66 -9
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/ripper_translation.md +22 -0
  8. data/ext/prism/api_node.c +30 -12
  9. data/ext/prism/extension.c +107 -372
  10. data/ext/prism/extension.h +1 -1
  11. data/include/prism/ast.h +138 -70
  12. data/include/prism/diagnostic.h +7 -2
  13. data/include/prism/node.h +0 -21
  14. data/include/prism/parser.h +23 -25
  15. data/include/prism/regexp.h +17 -8
  16. data/include/prism/static_literals.h +3 -2
  17. data/include/prism/util/pm_char.h +1 -2
  18. data/include/prism/util/pm_constant_pool.h +0 -8
  19. data/include/prism/util/pm_integer.h +16 -9
  20. data/include/prism/util/pm_string.h +0 -8
  21. data/include/prism/version.h +2 -2
  22. data/include/prism.h +0 -11
  23. data/lib/prism/compiler.rb +3 -0
  24. data/lib/prism/dispatcher.rb +14 -0
  25. data/lib/prism/dot_visitor.rb +22 -3
  26. data/lib/prism/dsl.rb +7 -2
  27. data/lib/prism/ffi.rb +24 -3
  28. data/lib/prism/inspect_visitor.rb +10 -8
  29. data/lib/prism/mutation_compiler.rb +6 -1
  30. data/lib/prism/node.rb +166 -241
  31. data/lib/prism/node_ext.rb +21 -5
  32. data/lib/prism/parse_result/comments.rb +0 -7
  33. data/lib/prism/parse_result/newlines.rb +101 -11
  34. data/lib/prism/parse_result.rb +17 -0
  35. data/lib/prism/reflection.rb +3 -1
  36. data/lib/prism/serialize.rb +80 -67
  37. data/lib/prism/translation/parser/compiler.rb +134 -114
  38. data/lib/prism/translation/parser.rb +6 -1
  39. data/lib/prism/translation/ripper.rb +8 -6
  40. data/lib/prism/translation/ruby_parser.rb +23 -5
  41. data/lib/prism/visitor.rb +3 -0
  42. data/lib/prism.rb +0 -4
  43. data/prism.gemspec +1 -4
  44. data/rbi/prism/node.rbi +63 -6
  45. data/rbi/prism/visitor.rbi +3 -0
  46. data/rbi/prism.rbi +6 -0
  47. data/sig/prism/dsl.rbs +4 -1
  48. data/sig/prism/mutation_compiler.rbs +1 -0
  49. data/sig/prism/node.rbs +28 -4
  50. data/sig/prism/visitor.rbs +1 -0
  51. data/sig/prism.rbs +21 -0
  52. data/src/diagnostic.c +27 -17
  53. data/src/node.c +408 -1666
  54. data/src/prettyprint.c +49 -6
  55. data/src/prism.c +958 -991
  56. data/src/regexp.c +133 -68
  57. data/src/serialize.c +6 -1
  58. data/src/static_literals.c +63 -84
  59. data/src/token_type.c +2 -2
  60. data/src/util/pm_constant_pool.c +0 -8
  61. data/src/util/pm_integer.c +39 -11
  62. data/src/util/pm_string.c +0 -12
  63. data/src/util/pm_strpbrk.c +32 -6
  64. metadata +2 -5
  65. data/include/prism/util/pm_string_list.h +0 -44
  66. data/lib/prism/debug.rb +0 -249
  67. data/src/util/pm_string_list.c +0 -28
data/src/prism.c CHANGED
@@ -423,7 +423,7 @@ lex_mode_pop(pm_parser_t *parser) {
423
423
  * This is the equivalent of IS_lex_state is CRuby.
424
424
  */
425
425
  static inline bool
426
- lex_state_p(pm_parser_t *parser, pm_lex_state_t state) {
426
+ lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
427
427
  return parser->lex_state & state;
428
428
  }
429
429
 
@@ -708,7 +708,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
708
708
  .previous = parser->current_scope,
709
709
  .locals = { 0 },
710
710
  .parameters = PM_SCOPE_PARAMETERS_NONE,
711
- .numbered_parameters = PM_SCOPE_NUMBERED_PARAMETERS_NONE,
711
+ .implicit_parameters = { 0 },
712
712
  .shareable_constant = (closed || parser->current_scope == NULL) ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
713
713
  .closed = closed
714
714
  };
@@ -1183,6 +1183,31 @@ pm_check_value_expression(pm_node_t *node) {
1183
1183
  return NULL;
1184
1184
  case PM_BEGIN_NODE: {
1185
1185
  pm_begin_node_t *cast = (pm_begin_node_t *) node;
1186
+
1187
+ if (cast->statements == NULL && cast->ensure_clause != NULL) {
1188
+ node = (pm_node_t *) cast->ensure_clause;
1189
+ }
1190
+ else {
1191
+ if (cast->rescue_clause != NULL) {
1192
+ if (cast->rescue_clause->statements == NULL) {
1193
+ return NULL;
1194
+ }
1195
+ else if (cast->else_clause != NULL) {
1196
+ node = (pm_node_t *) cast->else_clause;
1197
+ }
1198
+ else {
1199
+ node = (pm_node_t *) cast->statements;
1200
+ }
1201
+ }
1202
+ else {
1203
+ node = (pm_node_t *) cast->statements;
1204
+ }
1205
+ }
1206
+
1207
+ break;
1208
+ }
1209
+ case PM_ENSURE_NODE: {
1210
+ pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1186
1211
  node = (pm_node_t *) cast->statements;
1187
1212
  break;
1188
1213
  }
@@ -1630,7 +1655,7 @@ not_provided(pm_parser_t *parser) {
1630
1655
  return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1631
1656
  }
1632
1657
 
1633
- #define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = parser->start, .end = parser->start })
1658
+ #define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1634
1659
  #define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1635
1660
  #define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1636
1661
  #define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
@@ -2827,8 +2852,7 @@ static pm_call_node_t *
2827
2852
  pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2828
2853
  pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2829
2854
 
2830
- node->base.location.start = parser->start;
2831
- node->base.location.end = parser->start;
2855
+ node->base.location = PM_LOCATION_NULL_VALUE(parser);
2832
2856
  node->arguments = arguments;
2833
2857
 
2834
2858
  node->name = name;
@@ -4291,7 +4315,7 @@ pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4291
4315
  }
4292
4316
 
4293
4317
  /**
4294
- * Allocate and initialize a new FloatNode node from a FLOAT_RATIONAL token.
4318
+ * Allocate and initialize a new RationalNode node from a FLOAT_RATIONAL token.
4295
4319
  */
4296
4320
  static pm_rational_node_t *
4297
4321
  pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
@@ -4301,16 +4325,44 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4301
4325
  *node = (pm_rational_node_t) {
4302
4326
  {
4303
4327
  .type = PM_RATIONAL_NODE,
4304
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
4328
+ .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4305
4329
  .location = PM_LOCATION_TOKEN_VALUE(token)
4306
4330
  },
4307
- .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4308
- .type = PM_TOKEN_FLOAT,
4309
- .start = token->start,
4310
- .end = token->end - 1
4311
- }))
4331
+ .numerator = { 0 },
4332
+ .denominator = { 0 }
4312
4333
  };
4313
4334
 
4335
+ const uint8_t *start = token->start;
4336
+ const uint8_t *end = token->end - 1; // r
4337
+
4338
+ while (start < end && *start == '0') start++; // 0.1 -> .1
4339
+ while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4340
+
4341
+ size_t length = (size_t) (end - start);
4342
+ if (length == 1) {
4343
+ node->denominator.value = 1;
4344
+ return node;
4345
+ }
4346
+
4347
+ const uint8_t *point = memchr(start, '.', length);
4348
+ assert(point && "should have a decimal point");
4349
+
4350
+ uint8_t *digits = malloc(length);
4351
+ if (digits == NULL) {
4352
+ fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4353
+ abort();
4354
+ }
4355
+
4356
+ memcpy(digits, start, (unsigned long) (point - start));
4357
+ memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4358
+ pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4359
+
4360
+ digits[0] = '1';
4361
+ if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4362
+ pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4363
+ free(digits);
4364
+
4365
+ pm_integers_reduce(&node->numerator, &node->denominator);
4314
4366
  return node;
4315
4367
  }
4316
4368
 
@@ -4621,7 +4673,7 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant
4621
4673
  *node = (pm_global_variable_read_node_t) {
4622
4674
  {
4623
4675
  .type = PM_GLOBAL_VARIABLE_READ_NODE,
4624
- .location = { .start = parser->start, .end = parser->start }
4676
+ .location = PM_LOCATION_NULL_VALUE(parser)
4625
4677
  },
4626
4678
  .name = name
4627
4679
  };
@@ -4663,11 +4715,11 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan
4663
4715
  *node = (pm_global_variable_write_node_t) {
4664
4716
  {
4665
4717
  .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4666
- .location = { .start = parser->start, .end = parser->start }
4718
+ .location = PM_LOCATION_NULL_VALUE(parser)
4667
4719
  },
4668
4720
  .name = name,
4669
- .name_loc = { .start = parser->start, .end = parser->start },
4670
- .operator_loc = { .start = parser->start, .end = parser->start },
4721
+ .name_loc = PM_LOCATION_NULL_VALUE(parser),
4722
+ .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4671
4723
  .value = value
4672
4724
  };
4673
4725
 
@@ -4944,7 +4996,7 @@ pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, cons
4944
4996
  }
4945
4997
 
4946
4998
  /**
4947
- * Allocate and initialize a new IntegerNode node from an INTEGER_RATIONAL
4999
+ * Allocate and initialize a new RationalNode node from an INTEGER_RATIONAL
4948
5000
  * token.
4949
5001
  */
4950
5002
  static pm_rational_node_t *
@@ -4955,16 +5007,24 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const
4955
5007
  *node = (pm_rational_node_t) {
4956
5008
  {
4957
5009
  .type = PM_RATIONAL_NODE,
4958
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
5010
+ .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4959
5011
  .location = PM_LOCATION_TOKEN_VALUE(token)
4960
5012
  },
4961
- .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4962
- .type = PM_TOKEN_INTEGER,
4963
- .start = token->start,
4964
- .end = token->end - 1
4965
- }))
5013
+ .numerator = { 0 },
5014
+ .denominator = { .value = 1, 0 }
4966
5015
  };
4967
5016
 
5017
+ pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
5018
+ switch (base) {
5019
+ case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
5020
+ case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
5021
+ case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
5022
+ case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
5023
+ default: assert(false && "unreachable"); break;
5024
+ }
5025
+
5026
+ pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
5027
+
4968
5028
  return node;
4969
5029
  }
4970
5030
 
@@ -5462,6 +5522,23 @@ pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node,
5462
5522
  node->base.location.end = closing->end;
5463
5523
  }
5464
5524
 
5525
+ /**
5526
+ * Create a local variable read that is reading the implicit 'it' variable.
5527
+ */
5528
+ static pm_it_local_variable_read_node_t *
5529
+ pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5530
+ pm_it_local_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_it_local_variable_read_node_t);
5531
+
5532
+ *node = (pm_it_local_variable_read_node_t) {
5533
+ {
5534
+ .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5535
+ .location = PM_LOCATION_TOKEN_VALUE(name)
5536
+ }
5537
+ };
5538
+
5539
+ return node;
5540
+ }
5541
+
5465
5542
  /**
5466
5543
  * Allocate and initialize a new ItParametersNode node.
5467
5544
  */
@@ -5774,28 +5851,6 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5774
5851
  return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5775
5852
  }
5776
5853
 
5777
- /**
5778
- * Returns true if the given node is `it` default parameter.
5779
- */
5780
- static inline bool
5781
- pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
5782
- // Check if it's a local variable reference
5783
- if (node->type != PM_CALL_NODE) {
5784
- return false;
5785
- }
5786
-
5787
- // Check if it's a variable call
5788
- pm_call_node_t *call_node = (pm_call_node_t *) node;
5789
- if (!PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
5790
- return false;
5791
- }
5792
-
5793
- // Check if it's called `it`
5794
- pm_constant_id_t id = ((pm_call_node_t *)node)->name;
5795
- pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
5796
- return pm_token_is_it(constant->start, constant->start + constant->length);
5797
- }
5798
-
5799
5854
  /**
5800
5855
  * Returns true if the given bounds comprise a numbered parameter (i.e., they
5801
5856
  * are of the form /^_\d$/).
@@ -7355,9 +7410,9 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7355
7410
  {
7356
7411
  .type = PM_SYMBOL_NODE,
7357
7412
  .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7358
- .location = { .start = parser->start, .end = parser->start }
7413
+ .location = PM_LOCATION_NULL_VALUE(parser)
7359
7414
  },
7360
- .value_loc = { .start = parser->start, .end = parser->start },
7415
+ .value_loc = PM_LOCATION_NULL_VALUE(parser),
7361
7416
  .unescaped = { 0 }
7362
7417
  };
7363
7418
 
@@ -7758,10 +7813,10 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
7758
7813
  *node = (pm_while_node_t) {
7759
7814
  {
7760
7815
  .type = PM_WHILE_NODE,
7761
- .location = { .start = parser->start, .end = parser->start }
7816
+ .location = PM_LOCATION_NULL_VALUE(parser)
7762
7817
  },
7763
- .keyword_loc = { .start = parser->start, .end = parser->start },
7764
- .closing_loc = { .start = parser->start, .end = parser->start },
7818
+ .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7819
+ .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7765
7820
  .predicate = predicate,
7766
7821
  .statements = statements
7767
7822
  };
@@ -7916,51 +7971,6 @@ pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t leng
7916
7971
  return constant_id;
7917
7972
  }
7918
7973
 
7919
- /**
7920
- * Create a local variable read that is reading the implicit 'it' variable.
7921
- */
7922
- static pm_local_variable_read_node_t *
7923
- pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *name) {
7924
- if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_ORDINARY) {
7925
- pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
7926
- return NULL;
7927
- }
7928
-
7929
- if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED) {
7930
- pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
7931
- return NULL;
7932
- }
7933
-
7934
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IT;
7935
-
7936
- pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
7937
- pm_parser_local_add(parser, name_id, name->start, name->end, 0);
7938
-
7939
- return pm_local_variable_read_node_create_constant_id(parser, name, name_id, 0, false);
7940
- }
7941
-
7942
- /**
7943
- * Convert a `it` variable call node to a node for `it` default parameter.
7944
- */
7945
- static pm_node_t *
7946
- pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
7947
- if (
7948
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
7949
- !parser->current_scope->closed &&
7950
- (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
7951
- pm_node_is_it(parser, node)
7952
- ) {
7953
- pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
7954
-
7955
- if (read != NULL) {
7956
- pm_node_destroy(parser, node);
7957
- node = (pm_node_t *) read;
7958
- }
7959
- }
7960
-
7961
- return node;
7962
- }
7963
-
7964
7974
  /**
7965
7975
  * Add a parameter name to the current scope and check whether the name of the
7966
7976
  * parameter is unique or not.
@@ -7996,6 +8006,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
7996
8006
  pm_scope_t *scope = parser->current_scope;
7997
8007
  parser->current_scope = scope->previous;
7998
8008
  pm_locals_free(&scope->locals);
8009
+ pm_node_list_free(&scope->implicit_parameters);
7999
8010
  xfree(scope);
8000
8011
  }
8001
8012
 
@@ -8067,7 +8078,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) {
8067
8078
  * is beyond the end of the source then return '\0'.
8068
8079
  */
8069
8080
  static inline uint8_t
8070
- peek_at(pm_parser_t *parser, const uint8_t *cursor) {
8081
+ peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8071
8082
  if (cursor < parser->end) {
8072
8083
  return *cursor;
8073
8084
  } else {
@@ -8090,7 +8101,7 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8090
8101
  * that position is beyond the end of the source then return '\0'.
8091
8102
  */
8092
8103
  static inline uint8_t
8093
- peek(pm_parser_t *parser) {
8104
+ peek(const pm_parser_t *parser) {
8094
8105
  return peek_at(parser, parser->current.end);
8095
8106
  }
8096
8107
 
@@ -8155,6 +8166,14 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
8155
8166
  return memchr(cursor, '\n', (size_t) length);
8156
8167
  }
8157
8168
 
8169
+ /**
8170
+ * This is equivalent to the predicate of warn_balanced in CRuby.
8171
+ */
8172
+ static inline bool
8173
+ ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8174
+ return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8175
+ }
8176
+
8158
8177
  /**
8159
8178
  * Here we're going to check if this is a "magic" comment, and perform whatever
8160
8179
  * actions are necessary for it here.
@@ -8995,8 +9014,8 @@ lex_global_variable(pm_parser_t *parser) {
8995
9014
  // If we get here, then we have a $ followed by something that
8996
9015
  // isn't recognized as a global variable.
8997
9016
  pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8998
- size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8999
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
9017
+ const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9018
+ PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9000
9019
  }
9001
9020
 
9002
9021
  return PM_TOKEN_GLOBAL_VARIABLE;
@@ -9389,7 +9408,7 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9389
9408
  */
9390
9409
  static inline uint8_t
9391
9410
  escape_byte(uint8_t value, const uint8_t flags) {
9392
- if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x1f;
9411
+ if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9393
9412
  if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9394
9413
  return value;
9395
9414
  }
@@ -9489,22 +9508,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
9489
9508
  static inline void
9490
9509
  escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9491
9510
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9492
- pm_buffer_append_bytes(regular_expression_buffer, (const uint8_t *) "\\x", 2);
9493
-
9494
- uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
9495
- uint8_t byte2 = (uint8_t) (byte & 0xF);
9496
-
9497
- if (byte1 >= 0xA) {
9498
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
9499
- } else {
9500
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte1 + '0'));
9501
- }
9502
-
9503
- if (byte2 >= 0xA) {
9504
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 - 0xA + 'A'));
9505
- } else {
9506
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 + '0'));
9507
- }
9511
+ pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9508
9512
  }
9509
9513
 
9510
9514
  escape_write_byte_encoded(parser, buffer, byte);
@@ -9539,57 +9543,57 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9539
9543
  switch (peek(parser)) {
9540
9544
  case '\\': {
9541
9545
  parser->current.end++;
9542
- escape_write_byte_encoded(parser, buffer, escape_byte('\\', flags));
9546
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9543
9547
  return;
9544
9548
  }
9545
9549
  case '\'': {
9546
9550
  parser->current.end++;
9547
- escape_write_byte_encoded(parser, buffer, escape_byte('\'', flags));
9551
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9548
9552
  return;
9549
9553
  }
9550
9554
  case 'a': {
9551
9555
  parser->current.end++;
9552
- escape_write_byte_encoded(parser, buffer, escape_byte('\a', flags));
9556
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9553
9557
  return;
9554
9558
  }
9555
9559
  case 'b': {
9556
9560
  parser->current.end++;
9557
- escape_write_byte_encoded(parser, buffer, escape_byte('\b', flags));
9561
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9558
9562
  return;
9559
9563
  }
9560
9564
  case 'e': {
9561
9565
  parser->current.end++;
9562
- escape_write_byte_encoded(parser, buffer, escape_byte('\033', flags));
9566
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9563
9567
  return;
9564
9568
  }
9565
9569
  case 'f': {
9566
9570
  parser->current.end++;
9567
- escape_write_byte_encoded(parser, buffer, escape_byte('\f', flags));
9571
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9568
9572
  return;
9569
9573
  }
9570
9574
  case 'n': {
9571
9575
  parser->current.end++;
9572
- escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
9576
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9573
9577
  return;
9574
9578
  }
9575
9579
  case 'r': {
9576
9580
  parser->current.end++;
9577
- escape_write_byte_encoded(parser, buffer, escape_byte('\r', flags));
9581
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9578
9582
  return;
9579
9583
  }
9580
9584
  case 's': {
9581
9585
  parser->current.end++;
9582
- escape_write_byte_encoded(parser, buffer, escape_byte(' ', flags));
9586
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9583
9587
  return;
9584
9588
  }
9585
9589
  case 't': {
9586
9590
  parser->current.end++;
9587
- escape_write_byte_encoded(parser, buffer, escape_byte('\t', flags));
9591
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9588
9592
  return;
9589
9593
  }
9590
9594
  case 'v': {
9591
9595
  parser->current.end++;
9592
- escape_write_byte_encoded(parser, buffer, escape_byte('\v', flags));
9596
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9593
9597
  return;
9594
9598
  }
9595
9599
  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
@@ -9606,7 +9610,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9606
9610
  }
9607
9611
  }
9608
9612
 
9609
- escape_write_byte_encoded(parser, buffer, value);
9613
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9610
9614
  return;
9611
9615
  }
9612
9616
  case 'x': {
@@ -9625,11 +9629,16 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9625
9629
  parser->current.end++;
9626
9630
  }
9627
9631
 
9632
+ value = escape_byte(value, flags);
9628
9633
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9629
- pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9634
+ if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9635
+ pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9636
+ } else {
9637
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9638
+ }
9630
9639
  }
9631
9640
 
9632
- escape_write_byte_encoded(parser, buffer, escape_byte(value, flags));
9641
+ escape_write_byte_encoded(parser, buffer, value);
9633
9642
  } else {
9634
9643
  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9635
9644
  }
@@ -9658,7 +9667,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9658
9667
  pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9659
9668
  } else if (hexadecimal_length == 0) {
9660
9669
  // there are not hexadecimal characters
9661
- pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE);
9670
+ pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9671
+ pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9662
9672
  return;
9663
9673
  }
9664
9674
 
@@ -9707,10 +9717,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9707
9717
  }
9708
9718
  }
9709
9719
 
9710
- if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9711
- pm_parser_err(parser, start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9712
- }
9713
-
9714
9720
  return;
9715
9721
  }
9716
9722
  case 'c': {
@@ -9733,6 +9739,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9733
9739
  return;
9734
9740
  }
9735
9741
  parser->current.end++;
9742
+
9743
+ if (match(parser, 'u') || match(parser, 'U')) {
9744
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9745
+ return;
9746
+ }
9747
+
9736
9748
  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9737
9749
  return;
9738
9750
  case ' ':
@@ -9760,7 +9772,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9760
9772
  case 'C': {
9761
9773
  parser->current.end++;
9762
9774
  if (peek(parser) != '-') {
9763
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9775
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9776
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9764
9777
  return;
9765
9778
  }
9766
9779
 
@@ -9783,6 +9796,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9783
9796
  return;
9784
9797
  }
9785
9798
  parser->current.end++;
9799
+
9800
+ if (match(parser, 'u') || match(parser, 'U')) {
9801
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9802
+ return;
9803
+ }
9804
+
9786
9805
  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9787
9806
  return;
9788
9807
  case ' ':
@@ -9797,7 +9816,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9797
9816
  return;
9798
9817
  default: {
9799
9818
  if (!char_is_ascii_printable(peeked)) {
9800
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9819
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9820
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9801
9821
  return;
9802
9822
  }
9803
9823
 
@@ -9810,7 +9830,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9810
9830
  case 'M': {
9811
9831
  parser->current.end++;
9812
9832
  if (peek(parser) != '-') {
9813
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9833
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9834
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9814
9835
  return;
9815
9836
  }
9816
9837
 
@@ -9828,6 +9849,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9828
9849
  return;
9829
9850
  }
9830
9851
  parser->current.end++;
9852
+
9853
+ if (match(parser, 'u') || match(parser, 'U')) {
9854
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9855
+ return;
9856
+ }
9857
+
9831
9858
  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9832
9859
  return;
9833
9860
  case ' ':
@@ -9842,7 +9869,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9842
9869
  return;
9843
9870
  default:
9844
9871
  if (!char_is_ascii_printable(peeked)) {
9845
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9872
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9873
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9846
9874
  return;
9847
9875
  }
9848
9876
 
@@ -10803,6 +10831,8 @@ parser_lex(pm_parser_t *parser) {
10803
10831
  type = PM_TOKEN_USTAR_STAR;
10804
10832
  } else if (lex_state_beg_p(parser)) {
10805
10833
  type = PM_TOKEN_USTAR_STAR;
10834
+ } else if (ambiguous_operator_p(parser, space_seen)) {
10835
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10806
10836
  }
10807
10837
 
10808
10838
  if (lex_state_operator_p(parser)) {
@@ -10826,6 +10856,8 @@ parser_lex(pm_parser_t *parser) {
10826
10856
  type = PM_TOKEN_USTAR;
10827
10857
  } else if (lex_state_beg_p(parser)) {
10828
10858
  type = PM_TOKEN_USTAR;
10859
+ } else if (ambiguous_operator_p(parser, space_seen)) {
10860
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
10829
10861
  }
10830
10862
 
10831
10863
  if (lex_state_operator_p(parser)) {
@@ -10942,6 +10974,7 @@ parser_lex(pm_parser_t *parser) {
10942
10974
  // If we have quotes, then we're going to go until we find the
10943
10975
  // end quote.
10944
10976
  while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
10977
+ if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
10945
10978
  parser->current.end++;
10946
10979
  }
10947
10980
  }
@@ -10999,6 +11032,10 @@ parser_lex(pm_parser_t *parser) {
10999
11032
  LEX(PM_TOKEN_LESS_LESS_EQUAL);
11000
11033
  }
11001
11034
 
11035
+ if (ambiguous_operator_p(parser, space_seen)) {
11036
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11037
+ }
11038
+
11002
11039
  if (lex_state_operator_p(parser)) {
11003
11040
  lex_state_set(parser, PM_LEX_STATE_ARG);
11004
11041
  } else {
@@ -11112,6 +11149,8 @@ parser_lex(pm_parser_t *parser) {
11112
11149
  type = PM_TOKEN_UAMPERSAND;
11113
11150
  } else if (lex_state_beg_p(parser)) {
11114
11151
  type = PM_TOKEN_UAMPERSAND;
11152
+ } else if (ambiguous_operator_p(parser, space_seen)) {
11153
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11115
11154
  }
11116
11155
 
11117
11156
  if (lex_state_operator_p(parser)) {
@@ -11186,6 +11225,10 @@ parser_lex(pm_parser_t *parser) {
11186
11225
  LEX(PM_TOKEN_UPLUS);
11187
11226
  }
11188
11227
 
11228
+ if (ambiguous_operator_p(parser, space_seen)) {
11229
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11230
+ }
11231
+
11189
11232
  lex_state_set(parser, PM_LEX_STATE_BEG);
11190
11233
  LEX(PM_TOKEN_PLUS);
11191
11234
  }
@@ -11223,6 +11266,10 @@ parser_lex(pm_parser_t *parser) {
11223
11266
  LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
11224
11267
  }
11225
11268
 
11269
+ if (ambiguous_operator_p(parser, space_seen)) {
11270
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11271
+ }
11272
+
11226
11273
  lex_state_set(parser, PM_LEX_STATE_BEG);
11227
11274
  LEX(PM_TOKEN_MINUS);
11228
11275
  }
@@ -11321,6 +11368,10 @@ parser_lex(pm_parser_t *parser) {
11321
11368
  LEX(PM_TOKEN_REGEXP_BEGIN);
11322
11369
  }
11323
11370
 
11371
+ if (ambiguous_operator_p(parser, space_seen)) {
11372
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11373
+ }
11374
+
11324
11375
  if (lex_state_operator_p(parser)) {
11325
11376
  lex_state_set(parser, PM_LEX_STATE_ARG);
11326
11377
  } else {
@@ -11356,7 +11407,7 @@ parser_lex(pm_parser_t *parser) {
11356
11407
  // operator because we don't want to move into the string
11357
11408
  // lex mode unnecessarily.
11358
11409
  if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11359
- pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11410
+ pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11360
11411
  LEX(PM_TOKEN_PERCENT);
11361
11412
  }
11362
11413
 
@@ -11375,10 +11426,7 @@ parser_lex(pm_parser_t *parser) {
11375
11426
 
11376
11427
  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11377
11428
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11378
-
11379
- if (parser->current.end < parser->end) {
11380
- LEX(PM_TOKEN_STRING_BEGIN);
11381
- }
11429
+ LEX(PM_TOKEN_STRING_BEGIN);
11382
11430
  }
11383
11431
 
11384
11432
  // Delimiters for %-literals cannot be alphanumeric. We
@@ -11505,6 +11553,10 @@ parser_lex(pm_parser_t *parser) {
11505
11553
  }
11506
11554
  }
11507
11555
 
11556
+ if (ambiguous_operator_p(parser, space_seen)) {
11557
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11558
+ }
11559
+
11508
11560
  lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11509
11561
  LEX(PM_TOKEN_PERCENT);
11510
11562
  }
@@ -12315,9 +12367,10 @@ parser_lex(pm_parser_t *parser) {
12315
12367
 
12316
12368
  // If we are immediately following a newline and we have hit the
12317
12369
  // terminator, then we need to return the ending of the heredoc.
12318
- if (!line_continuation && current_token_starts_line(parser)) {
12370
+ if (current_token_starts_line(parser)) {
12319
12371
  const uint8_t *start = parser->current.start;
12320
- if (start + ident_length <= parser->end) {
12372
+
12373
+ if (!line_continuation && (start + ident_length <= parser->end)) {
12321
12374
  const uint8_t *newline = next_newline(start, parser->end - start);
12322
12375
  const uint8_t *ident_end = newline;
12323
12376
  const uint8_t *terminator_end = newline;
@@ -12473,11 +12526,8 @@ parser_lex(pm_parser_t *parser) {
12473
12526
  }
12474
12527
 
12475
12528
  parser->current.end = breakpoint + 1;
12476
-
12477
- if (!was_line_continuation) {
12478
- pm_token_buffer_flush(parser, &token_buffer);
12479
- LEX(PM_TOKEN_STRING_CONTENT);
12480
- }
12529
+ pm_token_buffer_flush(parser, &token_buffer);
12530
+ LEX(PM_TOKEN_STRING_CONTENT);
12481
12531
  }
12482
12532
 
12483
12533
  // Otherwise we hit a newline and it wasn't followed by
@@ -13112,11 +13162,40 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13112
13162
  return (pm_node_t *) result;
13113
13163
  }
13114
13164
 
13165
+ /**
13166
+ * When an implicit local variable is written to or targeted, it becomes a
13167
+ * regular, named local variable. This function removes it from the list of
13168
+ * implicit parameters when that happens.
13169
+ */
13170
+ static void
13171
+ parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13172
+ pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13173
+
13174
+ for (size_t index = 0; index < implicit_parameters->size; index++) {
13175
+ if (implicit_parameters->nodes[index] == node) {
13176
+ // If the node is not the last one in the list, we need to shift the
13177
+ // remaining nodes down to fill the gap. This is extremely unlikely
13178
+ // to happen.
13179
+ if (index != implicit_parameters->size - 1) {
13180
+ memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13181
+ }
13182
+
13183
+ implicit_parameters->size--;
13184
+ break;
13185
+ }
13186
+ }
13187
+ }
13188
+
13115
13189
  /**
13116
13190
  * Convert the given node into a valid target node.
13191
+ *
13192
+ * @param multiple Whether or not this target is part of a larger set of
13193
+ * targets. If it is, then the &. operator is not allowed.
13194
+ * @param splat Whether or not this target is a child of a splat target. If it
13195
+ * is, then fewer patterns are allowed.
13117
13196
  */
13118
13197
  static pm_node_t *
13119
- parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13198
+ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13120
13199
  switch (PM_NODE_TYPE(target)) {
13121
13200
  case PM_MISSING_NODE:
13122
13201
  return target;
@@ -13162,7 +13241,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13162
13241
  target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
13163
13242
  return target;
13164
13243
  case PM_LOCAL_VARIABLE_READ_NODE: {
13165
- pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
13244
+ if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13245
+ PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13246
+ parse_target_implicit_parameter(parser, target);
13247
+ }
13166
13248
 
13167
13249
  const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13168
13250
  uint32_t name = cast->name;
@@ -13174,17 +13256,32 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13174
13256
 
13175
13257
  return target;
13176
13258
  }
13259
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13260
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13261
+ pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13262
+
13263
+ parse_target_implicit_parameter(parser, target);
13264
+ pm_node_destroy(parser, target);
13265
+
13266
+ return node;
13267
+ }
13177
13268
  case PM_INSTANCE_VARIABLE_READ_NODE:
13178
13269
  assert(sizeof(pm_instance_variable_target_node_t) == sizeof(pm_instance_variable_read_node_t));
13179
13270
  target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
13180
13271
  return target;
13181
13272
  case PM_MULTI_TARGET_NODE:
13273
+ if (splat_parent) {
13274
+ // Multi target is not accepted in all positions. If this is one
13275
+ // of them, then we need to add an error.
13276
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13277
+ }
13278
+
13182
13279
  return target;
13183
13280
  case PM_SPLAT_NODE: {
13184
13281
  pm_splat_node_t *splat = (pm_splat_node_t *) target;
13185
13282
 
13186
13283
  if (splat->expression != NULL) {
13187
- splat->expression = parse_target(parser, splat->expression, multiple);
13284
+ splat->expression = parse_target(parser, splat->expression, multiple, true);
13188
13285
  }
13189
13286
 
13190
13287
  return (pm_node_t *) splat;
@@ -13254,9 +13351,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13254
13351
  */
13255
13352
  static pm_node_t *
13256
13353
  parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13257
- pm_node_t *result = parse_target(parser, target, multiple);
13354
+ pm_node_t *result = parse_target(parser, target, multiple, false);
13258
13355
 
13259
- // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
13356
+ // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13357
+ // parens after the targets.
13260
13358
  if (
13261
13359
  !match1(parser, PM_TOKEN_EQUAL) &&
13262
13360
  !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
@@ -13326,18 +13424,34 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13326
13424
  return (pm_node_t *) node;
13327
13425
  }
13328
13426
  case PM_LOCAL_VARIABLE_READ_NODE: {
13329
- pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
13330
13427
  pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
13331
13428
 
13332
13429
  pm_constant_id_t name = local_read->name;
13430
+ pm_location_t name_loc = target->location;
13431
+
13333
13432
  uint32_t depth = local_read->depth;
13334
- pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13433
+ pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13335
13434
 
13336
- pm_location_t name_loc = target->location;
13435
+ if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13436
+ pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13437
+ PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13438
+ parse_target_implicit_parameter(parser, target);
13439
+ }
13440
+
13441
+ pm_locals_unread(&scope->locals, name);
13337
13442
  pm_node_destroy(parser, target);
13338
13443
 
13339
13444
  return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13340
13445
  }
13446
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13447
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13448
+ pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13449
+
13450
+ parse_target_implicit_parameter(parser, target);
13451
+ pm_node_destroy(parser, target);
13452
+
13453
+ return node;
13454
+ }
13341
13455
  case PM_INSTANCE_VARIABLE_READ_NODE: {
13342
13456
  pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13343
13457
  pm_node_destroy(parser, target);
@@ -13491,7 +13605,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13491
13605
  bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13492
13606
 
13493
13607
  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13494
- pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
13608
+ pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13495
13609
 
13496
13610
  while (accept1(parser, PM_TOKEN_COMMA)) {
13497
13611
  if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -13507,7 +13621,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13507
13621
 
13508
13622
  if (token_begins_expression_p(parser->current.type)) {
13509
13623
  name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
13510
- name = parse_target(parser, name, true);
13624
+ name = parse_target(parser, name, true, true);
13511
13625
  }
13512
13626
 
13513
13627
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
@@ -13515,7 +13629,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13515
13629
  has_rest = true;
13516
13630
  } else if (token_begins_expression_p(parser->current.type)) {
13517
13631
  pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
13518
- target = parse_target(parser, target, true);
13632
+ target = parse_target(parser, target, true, false);
13519
13633
 
13520
13634
  pm_multi_target_node_targets_append(parser, result, target);
13521
13635
  } else if (!match1(parser, PM_TOKEN_EOF)) {
@@ -13552,8 +13666,8 @@ parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_
13552
13666
  */
13553
13667
  static pm_statements_node_t *
13554
13668
  parse_statements(pm_parser_t *parser, pm_context_t context) {
13555
- // First, skip past any optional terminators that might be at the beginning of
13556
- // the statements.
13669
+ // First, skip past any optional terminators that might be at the beginning
13670
+ // of the statements.
13557
13671
  while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13558
13672
 
13559
13673
  // If we have a terminator, then we can just return NULL.
@@ -13569,20 +13683,20 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13569
13683
  pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
13570
13684
  pm_statements_node_body_append(parser, statements, node);
13571
13685
 
13572
- // If we're recovering from a syntax error, then we need to stop parsing the
13573
- // statements now.
13686
+ // If we're recovering from a syntax error, then we need to stop parsing
13687
+ // the statements now.
13574
13688
  if (parser->recovering) {
13575
- // If this is the level of context where the recovery has happened, then
13576
- // we can mark the parser as done recovering.
13689
+ // If this is the level of context where the recovery has happened,
13690
+ // then we can mark the parser as done recovering.
13577
13691
  if (context_terminator(context, &parser->current)) parser->recovering = false;
13578
13692
  break;
13579
13693
  }
13580
13694
 
13581
- // If we have a terminator, then we will parse all consecutive terminators
13582
- // and then continue parsing the statements list.
13695
+ // If we have a terminator, then we will parse all consecutive
13696
+ // terminators and then continue parsing the statements list.
13583
13697
  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13584
- // If we have a terminator, then we will continue parsing the statements
13585
- // list.
13698
+ // If we have a terminator, then we will continue parsing the
13699
+ // statements list.
13586
13700
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13587
13701
  if (context_terminator(context, &parser->current)) break;
13588
13702
 
@@ -13590,27 +13704,28 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13590
13704
  continue;
13591
13705
  }
13592
13706
 
13593
- // At this point we have a list of statements that are not terminated by a
13594
- // newline or semicolon. At this point we need to check if we're at the end
13595
- // of the statements list. If we are, then we should break out of the loop.
13707
+ // At this point we have a list of statements that are not terminated by
13708
+ // a newline or semicolon. At this point we need to check if we're at
13709
+ // the end of the statements list. If we are, then we should break out
13710
+ // of the loop.
13596
13711
  if (context_terminator(context, &parser->current)) break;
13597
13712
 
13598
13713
  // At this point, we have a syntax error, because the statement was not
13599
13714
  // terminated by a newline or semicolon, and we're not at the end of the
13600
- // statements list. Ideally we should scan forward to determine if we should
13601
- // insert a missing terminator or break out of parsing the statements list
13602
- // at this point.
13715
+ // statements list. Ideally we should scan forward to determine if we
13716
+ // should insert a missing terminator or break out of parsing the
13717
+ // statements list at this point.
13603
13718
  //
13604
- // We don't have that yet, so instead we'll do a more naive approach. If we
13605
- // were unable to parse an expression, then we will skip past this token and
13606
- // continue parsing the statements list. Otherwise we'll add an error and
13607
- // continue parsing the statements list.
13719
+ // We don't have that yet, so instead we'll do a more naive approach. If
13720
+ // we were unable to parse an expression, then we will skip past this
13721
+ // token and continue parsing the statements list. Otherwise we'll add
13722
+ // an error and continue parsing the statements list.
13608
13723
  if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13609
13724
  parser_lex(parser);
13610
13725
 
13611
13726
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13612
13727
  if (context_terminator(context, &parser->current)) break;
13613
- } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
13728
+ } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13614
13729
  // This is an inlined version of accept1 because the error that we
13615
13730
  // want to add has varargs. If this happens again, we should
13616
13731
  // probably extract a helper function.
@@ -13632,7 +13747,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13632
13747
  */
13633
13748
  static void
13634
13749
  pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13635
- const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
13750
+ const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13636
13751
 
13637
13752
  if (duplicated != NULL) {
13638
13753
  pm_buffer_t buffer = { 0 };
@@ -13658,13 +13773,16 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
13658
13773
  */
13659
13774
  static void
13660
13775
  pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13661
- if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) {
13776
+ pm_node_t *previous;
13777
+
13778
+ if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
13662
13779
  pm_diagnostic_list_append_format(
13663
13780
  &parser->warning_list,
13664
13781
  node->location.start,
13665
13782
  node->location.end,
13666
13783
  PM_WARN_DUPLICATED_WHEN_CLAUSE,
13667
- pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
13784
+ pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
13785
+ pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
13668
13786
  );
13669
13787
  }
13670
13788
  }
@@ -14276,7 +14394,7 @@ parse_parameters(
14276
14394
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14277
14395
 
14278
14396
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14279
- uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
14397
+ uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14280
14398
 
14281
14399
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
14282
14400
  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
@@ -14289,7 +14407,7 @@ parse_parameters(
14289
14407
  // If the value of the parameter increased the number of
14290
14408
  // reads of that parameter, then we need to warn that we
14291
14409
  // have a circular definition.
14292
- if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
14410
+ if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14293
14411
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14294
14412
  }
14295
14413
 
@@ -14368,10 +14486,10 @@ parse_parameters(
14368
14486
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14369
14487
 
14370
14488
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14371
- uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
14489
+ uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14372
14490
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
14373
14491
 
14374
- if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
14492
+ if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14375
14493
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14376
14494
  }
14377
14495
 
@@ -14543,7 +14661,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14543
14661
  pm_rescue_node_operator_set(rescue, &parser->previous);
14544
14662
 
14545
14663
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14546
- reference = parse_target(parser, reference, false);
14664
+ reference = parse_target(parser, reference, false, false);
14547
14665
 
14548
14666
  pm_rescue_node_reference_set(rescue, reference);
14549
14667
  break;
@@ -14573,7 +14691,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14573
14691
  pm_rescue_node_operator_set(rescue, &parser->previous);
14574
14692
 
14575
14693
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14576
- reference = parse_target(parser, reference, false);
14694
+ reference = parse_target(parser, reference, false, false);
14577
14695
 
14578
14696
  pm_rescue_node_reference_set(rescue, reference);
14579
14697
  break;
@@ -14778,6 +14896,28 @@ parse_block_parameters(
14778
14896
  return block_parameters;
14779
14897
  }
14780
14898
 
14899
+ /**
14900
+ * Return true if any of the visible scopes to the current context are using
14901
+ * numbered parameters.
14902
+ */
14903
+ static bool
14904
+ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
14905
+ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14906
+ if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
14907
+ }
14908
+
14909
+ return false;
14910
+ }
14911
+
14912
+ /**
14913
+ * These are the names of the various numbered parameters. We have them here so
14914
+ * that when we insert them into the constant pool we can use a constant string
14915
+ * and not have to allocate.
14916
+ */
14917
+ static const char * const pm_numbered_parameter_names[] = {
14918
+ "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
14919
+ };
14920
+
14781
14921
  /**
14782
14922
  * Return the node that should be used in the parameters field of a block-like
14783
14923
  * (block or lambda) node, depending on the kind of parameters that were
@@ -14785,31 +14925,79 @@ parse_block_parameters(
14785
14925
  */
14786
14926
  static pm_node_t *
14787
14927
  parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
14788
- uint8_t masked = parser->current_scope->parameters & PM_SCOPE_PARAMETERS_TYPE_MASK;
14928
+ pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
14929
+
14930
+ // If we have ordinary parameters, then we will return them as the set of
14931
+ // parameters.
14932
+ if (parameters != NULL) {
14933
+ // If we also have implicit parameters, then this is an error.
14934
+ if (implicit_parameters->size > 0) {
14935
+ pm_node_t *node = implicit_parameters->nodes[0];
14936
+
14937
+ if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14938
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14939
+ } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14940
+ pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14941
+ } else {
14942
+ assert(false && "unreachable");
14943
+ }
14944
+ }
14789
14945
 
14790
- if (masked == PM_SCOPE_PARAMETERS_NONE) {
14791
- assert(parameters == NULL);
14792
- return NULL;
14793
- } else if (masked == PM_SCOPE_PARAMETERS_ORDINARY) {
14794
- assert(parameters != NULL);
14795
14946
  return parameters;
14796
- } else if (masked == PM_SCOPE_PARAMETERS_NUMBERED) {
14797
- assert(parameters == NULL);
14947
+ }
14948
+
14949
+ // If we don't have any implicit parameters, then the set of parameters is
14950
+ // NULL.
14951
+ if (implicit_parameters->size == 0) {
14952
+ return NULL;
14953
+ }
14954
+
14955
+ // If we don't have ordinary parameters, then we now must validate our set
14956
+ // of implicit parameters. We can only have numbered parameters or it, but
14957
+ // they cannot be mixed.
14958
+ uint8_t numbered_parameter = 0;
14959
+ bool it_parameter = false;
14960
+
14961
+ for (size_t index = 0; index < implicit_parameters->size; index++) {
14962
+ pm_node_t *node = implicit_parameters->nodes[index];
14963
+
14964
+ if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14965
+ if (it_parameter) {
14966
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
14967
+ } else if (outer_scope_using_numbered_parameters_p(parser)) {
14968
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
14969
+ } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
14970
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
14971
+ } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
14972
+ numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
14973
+ } else {
14974
+ assert(false && "unreachable");
14975
+ }
14976
+ } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14977
+ if (numbered_parameter > 0) {
14978
+ pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
14979
+ } else {
14980
+ it_parameter = true;
14981
+ }
14982
+ }
14983
+ }
14798
14984
 
14799
- int8_t maximum = parser->current_scope->numbered_parameters;
14800
- if (maximum > 0) {
14801
- const pm_location_t location = { .start = opening->start, .end = closing->end };
14802
- return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, (uint8_t) maximum);
14985
+ if (numbered_parameter > 0) {
14986
+ // Go through the parent scopes and mark them as being disallowed from
14987
+ // using numbered parameters because this inner scope is using them.
14988
+ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14989
+ scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
14803
14990
  }
14804
14991
 
14805
- return NULL;
14806
- } else if (masked == PM_SCOPE_PARAMETERS_IT) {
14807
- assert(parameters == NULL);
14992
+ const pm_location_t location = { .start = opening->start, .end = closing->end };
14993
+ return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
14994
+ }
14995
+
14996
+ if (it_parameter) {
14808
14997
  return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
14809
- } else {
14810
- assert(false && "unreachable");
14811
- return NULL;
14812
14998
  }
14999
+
15000
+ return NULL;
14813
15001
  }
14814
15002
 
14815
15003
  /**
@@ -14826,9 +15014,6 @@ parse_block(pm_parser_t *parser) {
14826
15014
  pm_block_parameters_node_t *block_parameters = NULL;
14827
15015
 
14828
15016
  if (accept1(parser, PM_TOKEN_PIPE)) {
14829
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
14830
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
14831
-
14832
15017
  pm_token_t block_parameters_opening = parser->previous;
14833
15018
  if (match1(parser, PM_TOKEN_PIPE)) {
14834
15019
  block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
@@ -15326,7 +15511,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
15326
15511
  #define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15327
15512
  case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15328
15513
  case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15329
- case PM_NUMBERED_REFERENCE_READ_NODE
15514
+ case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15330
15515
 
15331
15516
  // Assert here that the flags are the same so that we can safely switch the type
15332
15517
  // of the node without having to move the flags.
@@ -15384,6 +15569,10 @@ parse_string_part(pm_parser_t *parser) {
15384
15569
  // "aaa #{bbb} #@ccc ddd"
15385
15570
  // ^^^^^^
15386
15571
  case PM_TOKEN_EMBEXPR_BEGIN: {
15572
+ // Ruby disallows seeing encoding around interpolation in strings,
15573
+ // even though it is known at parse time.
15574
+ parser->explicit_encoding = NULL;
15575
+
15387
15576
  pm_lex_state_t state = parser->lex_state;
15388
15577
  int brace_nesting = parser->brace_nesting;
15389
15578
 
@@ -15406,6 +15595,13 @@ parse_string_part(pm_parser_t *parser) {
15406
15595
  expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15407
15596
  pm_token_t closing = parser->previous;
15408
15597
 
15598
+ // If this set of embedded statements only contains a single
15599
+ // statement, then Ruby does not consider it as a possible statement
15600
+ // that could emit a line event.
15601
+ if (statements != NULL && statements->body.size == 1) {
15602
+ pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
15603
+ }
15604
+
15409
15605
  return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
15410
15606
  }
15411
15607
 
@@ -15416,6 +15612,10 @@ parse_string_part(pm_parser_t *parser) {
15416
15612
  // "aaa #{bbb} #@ccc ddd"
15417
15613
  // ^^^^^
15418
15614
  case PM_TOKEN_EMBVAR: {
15615
+ // Ruby disallows seeing encoding around interpolation in strings,
15616
+ // even though it is known at parse time.
15617
+ parser->explicit_encoding = NULL;
15618
+
15419
15619
  lex_state_set(parser, PM_LEX_STATE_BEG);
15420
15620
  parser_lex(parser);
15421
15621
 
@@ -15731,74 +15931,43 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
15731
15931
  }
15732
15932
 
15733
15933
  /**
15734
- * Return true if any of the visible scopes to the current context are using
15735
- * numbered parameters.
15934
+ * Parse an identifier into either a local variable read. If the local variable
15935
+ * is not found, it returns NULL instead.
15736
15936
  */
15737
- static bool
15738
- outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15739
- for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15740
- if (scope->numbered_parameters > 0) return true;
15741
- }
15937
+ static pm_node_t *
15938
+ parse_variable(pm_parser_t *parser) {
15939
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
15940
+ int depth;
15742
15941
 
15743
- return false;
15744
- }
15745
-
15746
- /**
15747
- * These are the names of the various numbered parameters. We have them here so
15748
- * that when we insert them into the constant pool we can use a constant string
15749
- * and not have to allocate.
15750
- */
15751
- static const char * const pm_numbered_parameter_names[] = {
15752
- "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15753
- };
15754
-
15755
- /**
15756
- * Parse an identifier into either a local variable read. If the local variable
15757
- * is not found, it returns NULL instead.
15758
- */
15759
- static pm_local_variable_read_node_t *
15760
- parse_variable(pm_parser_t *parser) {
15761
- int depth;
15762
- if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
15763
- return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
15764
- }
15942
+ if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
15943
+ return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
15944
+ }
15765
15945
 
15766
15946
  pm_scope_t *current_scope = parser->current_scope;
15767
- if (!current_scope->closed && current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
15768
- // Now that we know we have a numbered parameter, we need to check
15769
- // if it's allowed in this context. If it is, then we will create a
15770
- // local variable read. If it's not, then we'll create a normal call
15771
- // node but add an error.
15772
- if (current_scope->parameters & PM_SCOPE_PARAMETERS_ORDINARY) {
15773
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15774
- } else if (current_scope->parameters & PM_SCOPE_PARAMETERS_IT) {
15775
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_IT);
15776
- } else if (outer_scope_using_numbered_parameters_p(parser)) {
15777
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
15778
- } else {
15779
- // Indicate that this scope is using numbered params so that child
15780
- // scopes cannot. We subtract the value for the character '0' to get
15781
- // the actual integer value of the number (only _1 through _9 are
15782
- // valid).
15783
- int8_t numbered_parameters = (int8_t) (parser->previous.start[1] - '0');
15784
- current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED;
15785
-
15786
- if (numbered_parameters > current_scope->numbered_parameters) {
15787
- current_scope->numbered_parameters = numbered_parameters;
15947
+ if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
15948
+ if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
15949
+ // When you use a numbered parameter, it implies the existence of
15950
+ // all of the locals that exist before it. For example, referencing
15951
+ // _2 means that _1 must exist. Therefore here we loop through all
15952
+ // of the possibilities and add them into the constant pool.
15953
+ uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
15954
+ for (uint8_t number = 1; number <= maximum; number++) {
15955
+ pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
15788
15956
  }
15789
15957
 
15790
- // When you use a numbered parameter, it implies the existence
15791
- // of all of the locals that exist before it. For example,
15792
- // referencing _2 means that _1 must exist. Therefore here we
15793
- // loop through all of the possibilities and add them into the
15794
- // constant pool.
15795
- for (int8_t numbered_param = 1; numbered_param <= numbered_parameters - 1; numbered_param++) {
15796
- pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_param - 1], 2);
15958
+ if (!match1(parser, PM_TOKEN_EQUAL)) {
15959
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
15797
15960
  }
15798
15961
 
15799
- // Finally we can create the local variable read node.
15800
- pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
15801
- return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
15962
+ pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
15963
+ pm_node_list_append(&current_scope->implicit_parameters, node);
15964
+
15965
+ return node;
15966
+ } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
15967
+ pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
15968
+ pm_node_list_append(&current_scope->implicit_parameters, node);
15969
+
15970
+ return node;
15802
15971
  }
15803
15972
  }
15804
15973
 
@@ -15813,8 +15982,8 @@ parse_variable_call(pm_parser_t *parser) {
15813
15982
  pm_node_flags_t flags = 0;
15814
15983
 
15815
15984
  if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
15816
- pm_local_variable_read_node_t *node = parse_variable(parser);
15817
- if (node != NULL) return (pm_node_t *) node;
15985
+ pm_node_t *node = parse_variable(parser);
15986
+ if (node != NULL) return node;
15818
15987
  flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
15819
15988
  }
15820
15989
 
@@ -15932,6 +16101,230 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
15932
16101
  nodes->size = write_index;
15933
16102
  }
15934
16103
 
16104
+ /**
16105
+ * Return a string content token at a particular location that is empty.
16106
+ */
16107
+ static pm_token_t
16108
+ parse_strings_empty_content(const uint8_t *location) {
16109
+ return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16110
+ }
16111
+
16112
+ /**
16113
+ * Parse a set of strings that could be concatenated together.
16114
+ */
16115
+ static inline pm_node_t *
16116
+ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16117
+ assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16118
+
16119
+ bool concating = false;
16120
+ bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
16121
+
16122
+ while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16123
+ pm_node_t *node = NULL;
16124
+
16125
+ // Here we have found a string literal. We'll parse it and add it to
16126
+ // the list of strings.
16127
+ const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16128
+ assert(lex_mode->mode == PM_LEX_STRING);
16129
+ bool lex_interpolation = lex_mode->as.string.interpolation;
16130
+
16131
+ pm_token_t opening = parser->current;
16132
+ parser_lex(parser);
16133
+
16134
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16135
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16136
+ // If we get here, then we have an end immediately after a
16137
+ // start. In that case we'll create an empty content token and
16138
+ // return an uninterpolated string.
16139
+ pm_token_t content = parse_strings_empty_content(parser->previous.start);
16140
+ pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16141
+
16142
+ pm_string_shared_init(&string->unescaped, content.start, content.end);
16143
+ node = (pm_node_t *) string;
16144
+ } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16145
+ // If we get here, then we have an end of a label immediately
16146
+ // after a start. In that case we'll create an empty symbol
16147
+ // node.
16148
+ pm_token_t content = parse_strings_empty_content(parser->previous.start);
16149
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16150
+
16151
+ pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16152
+ node = (pm_node_t *) symbol;
16153
+ } else if (!lex_interpolation) {
16154
+ // If we don't accept interpolation then we expect the string to
16155
+ // start with a single string content node.
16156
+ pm_string_t unescaped;
16157
+ pm_token_t content;
16158
+
16159
+ if (match1(parser, PM_TOKEN_EOF)) {
16160
+ unescaped = PM_STRING_EMPTY;
16161
+ content = not_provided(parser);
16162
+ } else {
16163
+ unescaped = parser->current_string;
16164
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16165
+ content = parser->previous;
16166
+ }
16167
+
16168
+ // It is unfortunately possible to have multiple string content
16169
+ // nodes in a row in the case that there's heredoc content in
16170
+ // the middle of the string, like this cursed example:
16171
+ //
16172
+ // <<-END+'b
16173
+ // a
16174
+ // END
16175
+ // c'+'d'
16176
+ //
16177
+ // In that case we need to switch to an interpolated string to
16178
+ // be able to contain all of the parts.
16179
+ if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16180
+ pm_node_list_t parts = { 0 };
16181
+
16182
+ pm_token_t delimiters = not_provided(parser);
16183
+ pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16184
+ pm_node_list_append(&parts, part);
16185
+
16186
+ do {
16187
+ part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16188
+ pm_node_list_append(&parts, part);
16189
+ parser_lex(parser);
16190
+ } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16191
+
16192
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16193
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16194
+
16195
+ pm_node_list_free(&parts);
16196
+ } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16197
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16198
+ } else if (match1(parser, PM_TOKEN_EOF)) {
16199
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16200
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16201
+ } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16202
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16203
+ } else {
16204
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16205
+ parser->previous.start = parser->previous.end;
16206
+ parser->previous.type = PM_TOKEN_MISSING;
16207
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16208
+ }
16209
+ } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16210
+ // In this case we've hit string content so we know the string
16211
+ // at least has something in it. We'll need to check if the
16212
+ // following token is the end (in which case we can return a
16213
+ // plain string) or if it's not then it has interpolation.
16214
+ pm_token_t content = parser->current;
16215
+ pm_string_t unescaped = parser->current_string;
16216
+ parser_lex(parser);
16217
+
16218
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16219
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16220
+ pm_node_flag_set(node, parse_unescaped_encoding(parser));
16221
+
16222
+ // Kind of odd behavior, but basically if we have an
16223
+ // unterminated string and it ends in a newline, we back up one
16224
+ // character so that the error message is on the last line of
16225
+ // content in the string.
16226
+ if (!accept1(parser, PM_TOKEN_STRING_END)) {
16227
+ const uint8_t *location = parser->previous.end;
16228
+ if (location > parser->start && location[-1] == '\n') location--;
16229
+ pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16230
+
16231
+ parser->previous.start = parser->previous.end;
16232
+ parser->previous.type = PM_TOKEN_MISSING;
16233
+ }
16234
+ } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16235
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16236
+ } else {
16237
+ // If we get here, then we have interpolation so we'll need
16238
+ // to create a string or symbol node with interpolation.
16239
+ pm_node_list_t parts = { 0 };
16240
+ pm_token_t string_opening = not_provided(parser);
16241
+ pm_token_t string_closing = not_provided(parser);
16242
+
16243
+ pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16244
+ pm_node_flag_set(part, parse_unescaped_encoding(parser));
16245
+ pm_node_list_append(&parts, part);
16246
+
16247
+ while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16248
+ if ((part = parse_string_part(parser)) != NULL) {
16249
+ pm_node_list_append(&parts, part);
16250
+ }
16251
+ }
16252
+
16253
+ if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16254
+ node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16255
+ } else if (match1(parser, PM_TOKEN_EOF)) {
16256
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16257
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16258
+ } else {
16259
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16260
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16261
+ }
16262
+
16263
+ pm_node_list_free(&parts);
16264
+ }
16265
+ } else {
16266
+ // If we get here, then the first part of the string is not plain
16267
+ // string content, in which case we need to parse the string as an
16268
+ // interpolated string.
16269
+ pm_node_list_t parts = { 0 };
16270
+ pm_node_t *part;
16271
+
16272
+ while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16273
+ if ((part = parse_string_part(parser)) != NULL) {
16274
+ pm_node_list_append(&parts, part);
16275
+ }
16276
+ }
16277
+
16278
+ if (accept1(parser, PM_TOKEN_LABEL_END)) {
16279
+ node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16280
+ } else if (match1(parser, PM_TOKEN_EOF)) {
16281
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16282
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16283
+ } else {
16284
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16285
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16286
+ }
16287
+
16288
+ pm_node_list_free(&parts);
16289
+ }
16290
+
16291
+ if (current == NULL) {
16292
+ // If the node we just parsed is a symbol node, then we can't
16293
+ // concatenate it with anything else, so we can now return that
16294
+ // node.
16295
+ if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16296
+ return node;
16297
+ }
16298
+
16299
+ // If we don't already have a node, then it's fine and we can just
16300
+ // set the result to be the node we just parsed.
16301
+ current = node;
16302
+ } else {
16303
+ // Otherwise we need to check the type of the node we just parsed.
16304
+ // If it cannot be concatenated with the previous node, then we'll
16305
+ // need to add a syntax error.
16306
+ if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16307
+ pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16308
+ }
16309
+
16310
+ // If we haven't already created our container for concatenation,
16311
+ // we'll do that now.
16312
+ if (!concating) {
16313
+ concating = true;
16314
+ pm_token_t bounds = not_provided(parser);
16315
+
16316
+ pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16317
+ pm_interpolated_string_node_append(container, current);
16318
+ current = (pm_node_t *) container;
16319
+ }
16320
+
16321
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16322
+ }
16323
+ }
16324
+
16325
+ return current;
16326
+ }
16327
+
15935
16328
  #define PM_PARSE_PATTERN_SINGLE 0
15936
16329
  #define PM_PARSE_PATTERN_TOP 1
15937
16330
  #define PM_PARSE_PATTERN_MULTI 2
@@ -16214,7 +16607,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
16214
16607
  */
16215
16608
  static void
16216
16609
  parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
16217
- if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
16610
+ if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
16218
16611
  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16219
16612
  }
16220
16613
  }
@@ -16289,8 +16682,20 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
16289
16682
  pm_node_list_append(&assocs, assoc);
16290
16683
  }
16291
16684
  } else {
16292
- expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16293
- pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
16685
+ pm_node_t *key;
16686
+
16687
+ if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16688
+ key = parse_strings(parser, NULL);
16689
+
16690
+ if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16691
+ pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16692
+ } else if (!pm_symbol_node_label_p(key)) {
16693
+ pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16694
+ }
16695
+ } else {
16696
+ expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16697
+ key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
16698
+ }
16294
16699
 
16295
16700
  parse_pattern_hash_key(parser, &keys, key);
16296
16701
  pm_node_t *value = NULL;
@@ -16502,19 +16907,8 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16502
16907
  pm_node_t *variable = (pm_node_t *) parse_variable(parser);
16503
16908
 
16504
16909
  if (variable == NULL) {
16505
- if (
16506
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
16507
- !parser->current_scope->closed &&
16508
- (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
16509
- pm_token_is_it(parser->previous.start, parser->previous.end)
16510
- ) {
16511
- pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
16512
- if (read == NULL) read = pm_local_variable_read_node_create(parser, &parser->previous, 0);
16513
- variable = (pm_node_t *) read;
16514
- } else {
16515
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16516
- variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
16517
- }
16910
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16911
+ variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
16518
16912
  }
16519
16913
 
16520
16914
  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
@@ -16762,276 +17156,67 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
16762
17156
  }
16763
17157
 
16764
17158
  trailing_rest = true;
16765
- } else {
16766
- node = parse_pattern_primitives(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA);
16767
- }
16768
-
16769
- pm_node_list_append(&nodes, node);
16770
- }
16771
-
16772
- // If the first pattern and the last pattern are rest patterns, then we will
16773
- // call this a find pattern, regardless of how many rest patterns are in
16774
- // between because we know we already added the appropriate errors.
16775
- // Otherwise we will create an array pattern.
16776
- if (PM_NODE_TYPE_P(nodes.nodes[0], PM_SPLAT_NODE) && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
16777
- node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
16778
- } else {
16779
- node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
16780
- }
16781
-
16782
- xfree(nodes.nodes);
16783
- } else if (leading_rest) {
16784
- // Otherwise, if we parsed a single splat pattern, then we know we have an
16785
- // array pattern, so we can go ahead and create that node.
16786
- node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
16787
- }
16788
-
16789
- return node;
16790
- }
16791
-
16792
- /**
16793
- * Incorporate a negative sign into a numeric node by subtracting 1 character
16794
- * from its start bounds. If it's a compound node, then we will recursively
16795
- * apply this function to its value.
16796
- */
16797
- static inline void
16798
- parse_negative_numeric(pm_node_t *node) {
16799
- switch (PM_NODE_TYPE(node)) {
16800
- case PM_INTEGER_NODE: {
16801
- pm_integer_node_t *cast = (pm_integer_node_t *) node;
16802
- cast->base.location.start--;
16803
- cast->value.negative = true;
16804
- break;
16805
- }
16806
- case PM_FLOAT_NODE: {
16807
- pm_float_node_t *cast = (pm_float_node_t *) node;
16808
- cast->base.location.start--;
16809
- cast->value = -cast->value;
16810
- break;
16811
- }
16812
- case PM_RATIONAL_NODE:
16813
- node->location.start--;
16814
- parse_negative_numeric(((pm_rational_node_t *) node)->numeric);
16815
- break;
16816
- case PM_IMAGINARY_NODE:
16817
- node->location.start--;
16818
- parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
16819
- break;
16820
- default:
16821
- assert(false && "unreachable");
16822
- break;
16823
- }
16824
- }
16825
-
16826
- /**
16827
- * Return a string content token at a particular location that is empty.
16828
- */
16829
- static pm_token_t
16830
- parse_strings_empty_content(const uint8_t *location) {
16831
- return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16832
- }
16833
-
16834
- /**
16835
- * Parse a set of strings that could be concatenated together.
16836
- */
16837
- static inline pm_node_t *
16838
- parse_strings(pm_parser_t *parser, pm_node_t *current) {
16839
- assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16840
-
16841
- bool concating = false;
16842
- bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
16843
-
16844
- while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16845
- pm_node_t *node = NULL;
16846
-
16847
- // Here we have found a string literal. We'll parse it and add it to
16848
- // the list of strings.
16849
- const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16850
- assert(lex_mode->mode == PM_LEX_STRING);
16851
- bool lex_interpolation = lex_mode->as.string.interpolation;
16852
-
16853
- pm_token_t opening = parser->current;
16854
- parser_lex(parser);
16855
-
16856
- if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16857
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16858
- // If we get here, then we have an end immediately after a
16859
- // start. In that case we'll create an empty content token and
16860
- // return an uninterpolated string.
16861
- pm_token_t content = parse_strings_empty_content(parser->previous.start);
16862
- pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16863
-
16864
- pm_string_shared_init(&string->unescaped, content.start, content.end);
16865
- node = (pm_node_t *) string;
16866
- } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16867
- // If we get here, then we have an end of a label immediately
16868
- // after a start. In that case we'll create an empty symbol
16869
- // node.
16870
- pm_token_t content = parse_strings_empty_content(parser->previous.start);
16871
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16872
-
16873
- pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16874
- node = (pm_node_t *) symbol;
16875
- } else if (!lex_interpolation) {
16876
- // If we don't accept interpolation then we expect the string to
16877
- // start with a single string content node.
16878
- pm_string_t unescaped;
16879
- pm_token_t content;
16880
- if (match1(parser, PM_TOKEN_EOF)) {
16881
- unescaped = PM_STRING_EMPTY;
16882
- content = not_provided(parser);
16883
- } else {
16884
- unescaped = parser->current_string;
16885
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16886
- content = parser->previous;
16887
- }
16888
-
16889
- // It is unfortunately possible to have multiple string content
16890
- // nodes in a row in the case that there's heredoc content in
16891
- // the middle of the string, like this cursed example:
16892
- //
16893
- // <<-END+'b
16894
- // a
16895
- // END
16896
- // c'+'d'
16897
- //
16898
- // In that case we need to switch to an interpolated string to
16899
- // be able to contain all of the parts.
16900
- if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16901
- pm_node_list_t parts = { 0 };
16902
-
16903
- pm_token_t delimiters = not_provided(parser);
16904
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16905
- pm_node_list_append(&parts, part);
16906
-
16907
- do {
16908
- part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16909
- pm_node_list_append(&parts, part);
16910
- parser_lex(parser);
16911
- } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16912
-
16913
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16914
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16915
-
16916
- pm_node_list_free(&parts);
16917
- } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16918
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16919
- } else if (match1(parser, PM_TOKEN_EOF)) {
16920
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16921
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16922
- } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16923
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16924
- } else {
16925
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16926
- parser->previous.start = parser->previous.end;
16927
- parser->previous.type = PM_TOKEN_MISSING;
16928
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16929
- }
16930
- } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16931
- // In this case we've hit string content so we know the string
16932
- // at least has something in it. We'll need to check if the
16933
- // following token is the end (in which case we can return a
16934
- // plain string) or if it's not then it has interpolation.
16935
- pm_token_t content = parser->current;
16936
- pm_string_t unescaped = parser->current_string;
16937
- parser_lex(parser);
16938
-
16939
- if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16940
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16941
- pm_node_flag_set(node, parse_unescaped_encoding(parser));
16942
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16943
- } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16944
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16945
- } else {
16946
- // If we get here, then we have interpolation so we'll need
16947
- // to create a string or symbol node with interpolation.
16948
- pm_node_list_t parts = { 0 };
16949
- pm_token_t string_opening = not_provided(parser);
16950
- pm_token_t string_closing = not_provided(parser);
16951
-
16952
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16953
- pm_node_flag_set(part, parse_unescaped_encoding(parser));
16954
- pm_node_list_append(&parts, part);
16955
-
16956
- while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16957
- if ((part = parse_string_part(parser)) != NULL) {
16958
- pm_node_list_append(&parts, part);
16959
- }
16960
- }
16961
-
16962
- if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16963
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16964
- } else if (match1(parser, PM_TOKEN_EOF)) {
16965
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16966
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16967
- } else {
16968
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16969
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16970
- }
16971
-
16972
- pm_node_list_free(&parts);
16973
- }
16974
- } else {
16975
- // If we get here, then the first part of the string is not plain
16976
- // string content, in which case we need to parse the string as an
16977
- // interpolated string.
16978
- pm_node_list_t parts = { 0 };
16979
- pm_node_t *part;
16980
-
16981
- while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16982
- if ((part = parse_string_part(parser)) != NULL) {
16983
- pm_node_list_append(&parts, part);
16984
- }
16985
- }
16986
-
16987
- if (accept1(parser, PM_TOKEN_LABEL_END)) {
16988
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16989
- } else if (match1(parser, PM_TOKEN_EOF)) {
16990
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16991
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16992
- } else {
16993
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16994
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
17159
+ } else {
17160
+ node = parse_pattern_primitives(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA);
16995
17161
  }
16996
17162
 
16997
- pm_node_list_free(&parts);
17163
+ pm_node_list_append(&nodes, node);
16998
17164
  }
16999
17165
 
17000
- if (current == NULL) {
17001
- // If the node we just parsed is a symbol node, then we can't
17002
- // concatenate it with anything else, so we can now return that
17003
- // node.
17004
- if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
17005
- return node;
17006
- }
17007
-
17008
- // If we don't already have a node, then it's fine and we can just
17009
- // set the result to be the node we just parsed.
17010
- current = node;
17166
+ // If the first pattern and the last pattern are rest patterns, then we will
17167
+ // call this a find pattern, regardless of how many rest patterns are in
17168
+ // between because we know we already added the appropriate errors.
17169
+ // Otherwise we will create an array pattern.
17170
+ if (PM_NODE_TYPE_P(nodes.nodes[0], PM_SPLAT_NODE) && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17171
+ node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
17011
17172
  } else {
17012
- // Otherwise we need to check the type of the node we just parsed.
17013
- // If it cannot be concatenated with the previous node, then we'll
17014
- // need to add a syntax error.
17015
- if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
17016
- pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
17017
- }
17173
+ node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17174
+ }
17018
17175
 
17019
- // If we haven't already created our container for concatenation,
17020
- // we'll do that now.
17021
- if (!concating) {
17022
- concating = true;
17023
- pm_token_t bounds = not_provided(parser);
17176
+ xfree(nodes.nodes);
17177
+ } else if (leading_rest) {
17178
+ // Otherwise, if we parsed a single splat pattern, then we know we have an
17179
+ // array pattern, so we can go ahead and create that node.
17180
+ node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17181
+ }
17024
17182
 
17025
- pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
17026
- pm_interpolated_string_node_append(container, current);
17027
- current = (pm_node_t *) container;
17028
- }
17183
+ return node;
17184
+ }
17029
17185
 
17030
- pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
17186
+ /**
17187
+ * Incorporate a negative sign into a numeric node by subtracting 1 character
17188
+ * from its start bounds. If it's a compound node, then we will recursively
17189
+ * apply this function to its value.
17190
+ */
17191
+ static inline void
17192
+ parse_negative_numeric(pm_node_t *node) {
17193
+ switch (PM_NODE_TYPE(node)) {
17194
+ case PM_INTEGER_NODE: {
17195
+ pm_integer_node_t *cast = (pm_integer_node_t *) node;
17196
+ cast->base.location.start--;
17197
+ cast->value.negative = true;
17198
+ break;
17199
+ }
17200
+ case PM_FLOAT_NODE: {
17201
+ pm_float_node_t *cast = (pm_float_node_t *) node;
17202
+ cast->base.location.start--;
17203
+ cast->value = -cast->value;
17204
+ break;
17205
+ }
17206
+ case PM_RATIONAL_NODE: {
17207
+ pm_rational_node_t *cast = (pm_rational_node_t *) node;
17208
+ cast->base.location.start--;
17209
+ cast->numerator.negative = true;
17210
+ break;
17031
17211
  }
17212
+ case PM_IMAGINARY_NODE:
17213
+ node->location.start--;
17214
+ parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17215
+ break;
17216
+ default:
17217
+ assert(false && "unreachable");
17218
+ break;
17032
17219
  }
17033
-
17034
- return current;
17035
17220
  }
17036
17221
 
17037
17222
  /**
@@ -17229,6 +17414,63 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17229
17414
  }
17230
17415
  }
17231
17416
 
17417
+ /**
17418
+ * This struct is used to pass information between the regular expression parser
17419
+ * and the error callback.
17420
+ */
17421
+ typedef struct {
17422
+ /** The parser that we are parsing the regular expression for. */
17423
+ pm_parser_t *parser;
17424
+
17425
+ /** The start of the regular expression. */
17426
+ const uint8_t *start;
17427
+
17428
+ /** The end of the regular expression. */
17429
+ const uint8_t *end;
17430
+
17431
+ /**
17432
+ * Whether or not the source of the regular expression is shared. This
17433
+ * impacts the location of error messages, because if it is shared then we
17434
+ * can use the location directly and if it is not, then we use the bounds of
17435
+ * the regular expression itself.
17436
+ */
17437
+ bool shared;
17438
+ } parse_regular_expression_error_data_t;
17439
+
17440
+ /**
17441
+ * This callback is called when the regular expression parser encounters a
17442
+ * syntax error.
17443
+ */
17444
+ static void
17445
+ parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17446
+ parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
17447
+ pm_location_t location;
17448
+
17449
+ if (callback_data->shared) {
17450
+ location = (pm_location_t) { .start = start, .end = end };
17451
+ } else {
17452
+ location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
17453
+ }
17454
+
17455
+ PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
17456
+ }
17457
+
17458
+ /**
17459
+ * Parse the errors for the regular expression and add them to the parser.
17460
+ */
17461
+ static void
17462
+ parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
17463
+ const pm_string_t *unescaped = &node->unescaped;
17464
+ parse_regular_expression_error_data_t error_data = {
17465
+ .parser = parser,
17466
+ .start = node->base.location.start,
17467
+ .end = node->base.location.end,
17468
+ .shared = unescaped->type == PM_STRING_SHARED
17469
+ };
17470
+
17471
+ pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), NULL, NULL, parse_regular_expression_error, &error_data);
17472
+ }
17473
+
17232
17474
  /**
17233
17475
  * Parse an expression that begins with the previous node that we just lexed.
17234
17476
  */
@@ -17249,8 +17491,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17249
17491
  break;
17250
17492
  }
17251
17493
 
17252
- if (pm_array_node_size(array) != 0) {
17253
- expect1(parser, PM_TOKEN_COMMA, PM_ERR_ARRAY_SEPARATOR);
17494
+ // Ensure that we have a comma between elements in the array.
17495
+ if ((pm_array_node_size(array) != 0) && !accept1(parser, PM_TOKEN_COMMA)) {
17496
+ const uint8_t *location = parser->previous.end;
17497
+ PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
17498
+
17499
+ parser->previous.start = location;
17500
+ parser->previous.type = PM_TOKEN_MISSING;
17254
17501
  }
17255
17502
 
17256
17503
  // If we have a right bracket immediately following a comma,
@@ -17428,7 +17675,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17428
17675
 
17429
17676
  // If we didn't find a terminator and we didn't find a right
17430
17677
  // parenthesis, then this is a syntax error.
17431
- if (!terminator_found) {
17678
+ if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
17432
17679
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17433
17680
  }
17434
17681
 
@@ -17457,7 +17704,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17457
17704
  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
17458
17705
  } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17459
17706
  break;
17460
- } else {
17707
+ } else if (!match1(parser, PM_TOKEN_EOF)) {
17708
+ // If we're at the end of the file, then we're going to add
17709
+ // an error after this for the ) anyway.
17461
17710
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17462
17711
  }
17463
17712
  }
@@ -17676,8 +17925,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17676
17925
  ) {
17677
17926
  pm_arguments_t arguments = { 0 };
17678
17927
  parse_arguments_list(parser, &arguments, true, accepts_command_call);
17679
-
17680
17928
  pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
17929
+
17930
+ if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
17931
+ // If we're about to convert an 'it' implicit local
17932
+ // variable read into a method call, we need to remove
17933
+ // it from the list of implicit local variables.
17934
+ parse_target_implicit_parameter(parser, node);
17935
+ } else {
17936
+ // Otherwise, we're about to convert a regular local
17937
+ // variable read into a method call, in which case we
17938
+ // need to indicate that this was not a read for the
17939
+ // purposes of warnings.
17940
+ assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
17941
+
17942
+ if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
17943
+ parse_target_implicit_parameter(parser, node);
17944
+ } else {
17945
+ pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
17946
+ pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
17947
+ }
17948
+ }
17949
+
17681
17950
  pm_node_destroy(parser, node);
17682
17951
  return (pm_node_t *) fcall;
17683
17952
  }
@@ -17685,31 +17954,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17685
17954
 
17686
17955
  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17687
17956
  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
17688
- } else {
17689
- // Check if `it` is not going to be assigned.
17690
- switch (parser->current.type) {
17691
- case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
17692
- case PM_TOKEN_AMPERSAND_EQUAL:
17693
- case PM_TOKEN_CARET_EQUAL:
17694
- case PM_TOKEN_EQUAL:
17695
- case PM_TOKEN_GREATER_GREATER_EQUAL:
17696
- case PM_TOKEN_LESS_LESS_EQUAL:
17697
- case PM_TOKEN_MINUS_EQUAL:
17698
- case PM_TOKEN_PARENTHESIS_RIGHT:
17699
- case PM_TOKEN_PERCENT_EQUAL:
17700
- case PM_TOKEN_PIPE_EQUAL:
17701
- case PM_TOKEN_PIPE_PIPE_EQUAL:
17702
- case PM_TOKEN_PLUS_EQUAL:
17703
- case PM_TOKEN_SLASH_EQUAL:
17704
- case PM_TOKEN_STAR_EQUAL:
17705
- case PM_TOKEN_STAR_STAR_EQUAL:
17706
- break;
17707
- default:
17708
- // Once we know it's neither a method call nor an
17709
- // assignment, we can finally create `it` default
17710
- // parameter.
17711
- node = pm_node_check_it(parser, node);
17712
- }
17713
17957
  }
17714
17958
 
17715
17959
  return node;
@@ -17970,6 +18214,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17970
18214
  // as frozen because when clause strings are frozen.
17971
18215
  if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
17972
18216
  pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18217
+ } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18218
+ pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
17973
18219
  }
17974
18220
 
17975
18221
  pm_when_clause_static_literals_add(parser, &literals, condition);
@@ -18375,7 +18621,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18375
18621
 
18376
18622
  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18377
18623
  receiver = parse_variable_call(parser);
18378
- receiver = pm_node_check_it(parser, receiver);
18379
18624
 
18380
18625
  pm_parser_scope_push(parser, true);
18381
18626
  lex_state_set(parser, PM_LEX_STATE_FNAME);
@@ -18712,7 +18957,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18712
18957
  if (match1(parser, PM_TOKEN_COMMA)) {
18713
18958
  index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
18714
18959
  } else {
18715
- index = parse_target(parser, index, false);
18960
+ index = parse_target(parser, index, false, false);
18716
18961
  }
18717
18962
 
18718
18963
  context_pop(parser);
@@ -19347,13 +19592,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19347
19592
  bool ascii_only = parser->current_regular_expression_ascii_only;
19348
19593
  parser_lex(parser);
19349
19594
 
19350
- // If we hit an end, then we can create a regular expression node
19351
- // without interpolation, which can be represented more succinctly and
19352
- // more easily compiled.
19595
+ // If we hit an end, then we can create a regular expression
19596
+ // node without interpolation, which can be represented more
19597
+ // succinctly and more easily compiled.
19353
19598
  if (accept1(parser, PM_TOKEN_REGEXP_END)) {
19354
- pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19355
- pm_node_flag_set(node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->flags));
19356
- return node;
19599
+ pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19600
+
19601
+ // If we're not immediately followed by a =~, then we want
19602
+ // to parse all of the errors at this point. If it is
19603
+ // followed by a =~, then it will get parsed higher up while
19604
+ // parsing the named captures as well.
19605
+ if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
19606
+ parse_regular_expression_errors(parser, node);
19607
+ }
19608
+
19609
+ pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
19610
+ return (pm_node_t *) node;
19357
19611
  }
19358
19612
 
19359
19613
  // If we get here, then we have interpolation so we'll need to create
@@ -19571,9 +19825,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19571
19825
 
19572
19826
  switch (parser->current.type) {
19573
19827
  case PM_TOKEN_PARENTHESIS_LEFT: {
19574
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
19575
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
19576
-
19577
19828
  pm_token_t opening = parser->current;
19578
19829
  parser_lex(parser);
19579
19830
 
@@ -19590,9 +19841,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19590
19841
  break;
19591
19842
  }
19592
19843
  case PM_CASE_PARAMETER: {
19593
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
19594
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
19595
-
19596
19844
  pm_accepts_block_stack_push(parser, false);
19597
19845
  pm_token_t opening = not_provided(parser);
19598
19846
  block_parameters = parse_block_parameters(parser, false, &opening, true);
@@ -19845,89 +20093,126 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
19845
20093
  }
19846
20094
 
19847
20095
  /**
19848
- * Potentially change a =~ with a regular expression with named captures into a
19849
- * match write node.
20096
+ * This struct is used to pass information between the regular expression parser
20097
+ * and the named capture callback.
19850
20098
  */
19851
- static pm_node_t *
19852
- parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
19853
- pm_string_list_t named_captures = { 0 };
19854
- pm_node_t *result;
20099
+ typedef struct {
20100
+ /** The parser that is parsing the regular expression. */
20101
+ pm_parser_t *parser;
19855
20102
 
19856
- if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, parser->encoding) && (named_captures.length > 0)) {
19857
- // Since we should not create a MatchWriteNode when all capture names
19858
- // are invalid, creating a MatchWriteNode is delaid here.
19859
- pm_match_write_node_t *match = NULL;
19860
- pm_constant_id_list_t names = { 0 };
20103
+ /** The call node wrapping the regular expression node. */
20104
+ pm_call_node_t *call;
19861
20105
 
19862
- for (size_t index = 0; index < named_captures.length; index++) {
19863
- pm_string_t *string = &named_captures.strings[index];
20106
+ /** The match write node that is being created. */
20107
+ pm_match_write_node_t *match;
19864
20108
 
19865
- const uint8_t *source = pm_string_source(string);
19866
- size_t length = pm_string_length(string);
20109
+ /** The list of names that have been parsed. */
20110
+ pm_constant_id_list_t names;
19867
20111
 
19868
- pm_location_t location;
19869
- pm_constant_id_t name;
20112
+ /**
20113
+ * Whether the content of the regular expression is shared. This impacts
20114
+ * whether or not we used owned constants or shared constants in the
20115
+ * constant pool for the names of the captures.
20116
+ */
20117
+ bool shared;
20118
+ } parse_regular_expression_named_capture_data_t;
19870
20119
 
19871
- // If the name of the capture group isn't a valid identifier, we do
19872
- // not add it to the local table.
19873
- if (!pm_slice_is_valid_local(parser, source, source + length)) continue;
20120
+ /**
20121
+ * This callback is called when the regular expression parser encounters a named
20122
+ * capture group.
20123
+ */
20124
+ static void
20125
+ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20126
+ parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data;
19874
20127
 
19875
- if (content->type == PM_STRING_SHARED) {
19876
- // If the unescaped string is a slice of the source, then we can
19877
- // copy the names directly. The pointers will line up.
19878
- location = (pm_location_t) { .start = source, .end = source + length };
19879
- name = pm_parser_constant_id_location(parser, location.start, location.end);
19880
- } else {
19881
- // Otherwise, the name is a slice of the malloc-ed owned string,
19882
- // in which case we need to copy it out into a new string.
19883
- location = call->receiver->location;
20128
+ pm_parser_t *parser = callback_data->parser;
20129
+ pm_call_node_t *call = callback_data->call;
20130
+ pm_constant_id_list_t *names = &callback_data->names;
19884
20131
 
19885
- void *memory = xmalloc(length);
19886
- if (memory == NULL) abort();
20132
+ const uint8_t *source = pm_string_source(capture);
20133
+ size_t length = pm_string_length(capture);
19887
20134
 
19888
- memcpy(memory, source, length);
19889
- name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
19890
- }
20135
+ pm_location_t location;
20136
+ pm_constant_id_t name;
19891
20137
 
19892
- if (name != 0) {
19893
- // We dont want to create duplicate targets if the capture name
19894
- // is duplicated.
19895
- if (pm_constant_id_list_includes(&names, name)) continue;
19896
- pm_constant_id_list_append(&names, name);
20138
+ // If the name of the capture group isn't a valid identifier, we do
20139
+ // not add it to the local table.
20140
+ if (!pm_slice_is_valid_local(parser, source, source + length)) return;
19897
20141
 
19898
- int depth;
19899
- if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
19900
- // If the identifier is not already a local, then we'll add
19901
- // it to the local table unless it's a keyword.
19902
- if (pm_local_is_keyword((const char *) source, length)) continue;
20142
+ if (callback_data->shared) {
20143
+ // If the unescaped string is a slice of the source, then we can
20144
+ // copy the names directly. The pointers will line up.
20145
+ location = (pm_location_t) { .start = source, .end = source + length };
20146
+ name = pm_parser_constant_id_location(parser, location.start, location.end);
20147
+ } else {
20148
+ // Otherwise, the name is a slice of the malloc-ed owned string,
20149
+ // in which case we need to copy it out into a new string.
20150
+ location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
19903
20151
 
19904
- pm_parser_local_add(parser, name, location.start, location.end, 0);
19905
- }
20152
+ void *memory = xmalloc(length);
20153
+ if (memory == NULL) abort();
19906
20154
 
19907
- // Here we lazily create the MatchWriteNode since we know we're
19908
- // about to add a target.
19909
- if (match == NULL) match = pm_match_write_node_create(parser, call);
20155
+ memcpy(memory, source, length);
20156
+ name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20157
+ }
19910
20158
 
19911
- // Next, create the local variable target and add it to the
19912
- // list of targets for the match.
19913
- pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
19914
- pm_node_list_append(&match->targets, target);
19915
- }
20159
+ // Add this name to the list of constants if it is valid, not duplicated,
20160
+ // and not a keyword.
20161
+ if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20162
+ pm_constant_id_list_append(names, name);
20163
+
20164
+ int depth;
20165
+ if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20166
+ // If the local is not already a local but it is a keyword, then we
20167
+ // do not want to add a capture for this.
20168
+ if (pm_local_is_keyword((const char *) source, length)) return;
20169
+
20170
+ // If the identifier is not already a local, then we will add it to
20171
+ // the local table.
20172
+ pm_parser_local_add(parser, name, location.start, location.end, 0);
19916
20173
  }
19917
20174
 
19918
- if (match != NULL) {
19919
- result = (pm_node_t *) match;
19920
- } else {
19921
- result = (pm_node_t *) call;
20175
+ // Here we lazily create the MatchWriteNode since we know we're
20176
+ // about to add a target.
20177
+ if (callback_data->match == NULL) {
20178
+ callback_data->match = pm_match_write_node_create(parser, call);
19922
20179
  }
19923
20180
 
19924
- pm_constant_id_list_free(&names);
19925
- } else {
19926
- result = (pm_node_t *) call;
20181
+ // Next, create the local variable target and add it to the list of
20182
+ // targets for the match.
20183
+ pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
20184
+ pm_node_list_append(&callback_data->match->targets, target);
19927
20185
  }
20186
+ }
19928
20187
 
19929
- pm_string_list_free(&named_captures);
19930
- return result;
20188
+ /**
20189
+ * Potentially change a =~ with a regular expression with named captures into a
20190
+ * match write node.
20191
+ */
20192
+ static pm_node_t *
20193
+ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
20194
+ parse_regular_expression_named_capture_data_t callback_data = {
20195
+ .parser = parser,
20196
+ .call = call,
20197
+ .names = { 0 },
20198
+ .shared = content->type == PM_STRING_SHARED
20199
+ };
20200
+
20201
+ parse_regular_expression_error_data_t error_data = {
20202
+ .parser = parser,
20203
+ .start = call->receiver->location.start,
20204
+ .end = call->receiver->location.end,
20205
+ .shared = content->type == PM_STRING_SHARED
20206
+ };
20207
+
20208
+ pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20209
+ pm_constant_id_list_free(&callback_data.names);
20210
+
20211
+ if (callback_data.match != NULL) {
20212
+ return (pm_node_t *) callback_data.match;
20213
+ } else {
20214
+ return (pm_node_t *) call;
20215
+ }
19931
20216
  }
19932
20217
 
19933
20218
  static inline pm_node_t *
@@ -20044,7 +20329,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20044
20329
  return result;
20045
20330
  }
20046
20331
  case PM_CALL_NODE: {
20047
- parser_lex(parser);
20048
20332
  pm_call_node_t *cast = (pm_call_node_t *) node;
20049
20333
 
20050
20334
  // If we have a vcall (a method with no arguments and no
@@ -20055,6 +20339,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20055
20339
  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20056
20340
 
20057
20341
  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20342
+ parser_lex(parser);
20343
+
20058
20344
  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
20059
20345
  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
20060
20346
 
@@ -20062,6 +20348,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20062
20348
  return result;
20063
20349
  }
20064
20350
 
20351
+ // Move past the token here so that we have already added
20352
+ // the local variable by this point.
20353
+ parser_lex(parser);
20354
+
20065
20355
  // If there is no call operator and the message is "[]" then
20066
20356
  // this is an aref expression, and we can transform it into
20067
20357
  // an aset expression.
@@ -20157,7 +20447,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20157
20447
  return result;
20158
20448
  }
20159
20449
  case PM_CALL_NODE: {
20160
- parser_lex(parser);
20161
20450
  pm_call_node_t *cast = (pm_call_node_t *) node;
20162
20451
 
20163
20452
  // If we have a vcall (a method with no arguments and no
@@ -20168,6 +20457,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20168
20457
  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20169
20458
 
20170
20459
  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20460
+ parser_lex(parser);
20461
+
20171
20462
  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
20172
20463
  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
20173
20464
 
@@ -20175,6 +20466,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20175
20466
  return result;
20176
20467
  }
20177
20468
 
20469
+ // Move past the token here so that we have already added
20470
+ // the local variable by this point.
20471
+ parser_lex(parser);
20472
+
20178
20473
  // If there is no call operator and the message is "[]" then
20179
20474
  // this is an aref expression, and we can transform it into
20180
20475
  // an aset expression.
@@ -20584,7 +20879,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20584
20879
 
20585
20880
  if (
20586
20881
  (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
20587
- (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))
20882
+ (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
20588
20883
  ) {
20589
20884
  // If we have a constant immediately following a '::' operator, then
20590
20885
  // this can either be a constant path or a method call, depending on
@@ -21127,7 +21422,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
21127
21422
 
21128
21423
  // Scopes given from the outside are not allowed to have numbered
21129
21424
  // parameters.
21130
- parser->current_scope->numbered_parameters = PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED;
21425
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
21131
21426
 
21132
21427
  for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
21133
21428
  const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
@@ -21515,331 +21810,3 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
21515
21810
  }
21516
21811
 
21517
21812
  #endif
21518
-
21519
- /** An error that is going to be formatted into the output. */
21520
- typedef struct {
21521
- /** A pointer to the diagnostic that was generated during parsing. */
21522
- pm_diagnostic_t *error;
21523
-
21524
- /** The start line of the diagnostic message. */
21525
- int32_t line;
21526
-
21527
- /** The column start of the diagnostic message. */
21528
- uint32_t column_start;
21529
-
21530
- /** The column end of the diagnostic message. */
21531
- uint32_t column_end;
21532
- } pm_error_t;
21533
-
21534
- /** The format that will be used to format the errors into the output. */
21535
- typedef struct {
21536
- /** The prefix that will be used for line numbers. */
21537
- const char *number_prefix;
21538
-
21539
- /** The prefix that will be used for blank lines. */
21540
- const char *blank_prefix;
21541
-
21542
- /** The divider that will be used between sections of source code. */
21543
- const char *divider;
21544
-
21545
- /** The length of the blank prefix. */
21546
- size_t blank_prefix_length;
21547
-
21548
- /** The length of the divider. */
21549
- size_t divider_length;
21550
- } pm_error_format_t;
21551
-
21552
- #define PM_COLOR_GRAY "\033[38;5;102m"
21553
- #define PM_COLOR_RED "\033[1;31m"
21554
- #define PM_COLOR_RESET "\033[m"
21555
-
21556
- static inline pm_error_t *
21557
- pm_parser_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
21558
- pm_error_t *errors = xcalloc(error_list->size, sizeof(pm_error_t));
21559
- if (errors == NULL) return NULL;
21560
-
21561
- int32_t start_line = parser->start_line;
21562
- for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
21563
- pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
21564
- pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
21565
-
21566
- // We're going to insert this error into the array in sorted order. We
21567
- // do this by finding the first error that has a line number greater
21568
- // than the current error and then inserting the current error before
21569
- // that one.
21570
- size_t index = 0;
21571
- while (
21572
- (index < error_list->size) &&
21573
- (errors[index].error != NULL) &&
21574
- (
21575
- (errors[index].line < start.line) ||
21576
- ((errors[index].line == start.line) && (errors[index].column_start < start.column))
21577
- )
21578
- ) index++;
21579
-
21580
- // Now we're going to shift all of the errors after this one down one
21581
- // index to make room for the new error.
21582
- if (index + 1 < error_list->size) {
21583
- memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
21584
- }
21585
-
21586
- // Finally, we'll insert the error into the array.
21587
- uint32_t column_end;
21588
- if (start.line == end.line) {
21589
- column_end = end.column;
21590
- } else {
21591
- column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
21592
- }
21593
-
21594
- // Ensure we have at least one column of error.
21595
- if (start.column == column_end) column_end++;
21596
-
21597
- errors[index] = (pm_error_t) {
21598
- .error = error,
21599
- .line = start.line,
21600
- .column_start = start.column,
21601
- .column_end = column_end
21602
- };
21603
- }
21604
-
21605
- return errors;
21606
- }
21607
-
21608
- static inline void
21609
- pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, pm_buffer_t *buffer) {
21610
- int32_t line_delta = line - parser->start_line;
21611
- assert(line_delta >= 0);
21612
-
21613
- size_t index = (size_t) line_delta;
21614
- assert(index < newline_list->size);
21615
-
21616
- const uint8_t *start = &parser->start[newline_list->offsets[index]];
21617
- const uint8_t *end;
21618
-
21619
- if (index >= newline_list->size - 1) {
21620
- end = parser->end;
21621
- } else {
21622
- end = &parser->start[newline_list->offsets[index + 1]];
21623
- }
21624
-
21625
- pm_buffer_append_format(buffer, number_prefix, line);
21626
- pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
21627
-
21628
- if (end == parser->end && end[-1] != '\n') {
21629
- pm_buffer_append_string(buffer, "\n", 1);
21630
- }
21631
- }
21632
-
21633
- /**
21634
- * Format the errors on the parser into the given buffer.
21635
- */
21636
- PRISM_EXPORTED_FUNCTION void
21637
- pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, pm_buffer_t *buffer, bool colorize, bool inline_messages) {
21638
- assert(error_list->size != 0);
21639
-
21640
- // First, we're going to sort all of the errors by line number using an
21641
- // insertion sort into a newly allocated array.
21642
- const int32_t start_line = parser->start_line;
21643
- const pm_newline_list_t *newline_list = &parser->newline_list;
21644
-
21645
- pm_error_t *errors = pm_parser_errors_format_sort(parser, error_list, newline_list);
21646
- if (errors == NULL) return;
21647
-
21648
- // Now we're going to determine how we're going to format line numbers and
21649
- // blank lines based on the maximum number of digits in the line numbers
21650
- // that are going to be displaid.
21651
- pm_error_format_t error_format;
21652
- int32_t first_line_number = errors[0].line;
21653
- int32_t last_line_number = errors[error_list->size - 1].line;
21654
-
21655
- // If we have a maximum line number that is negative, then we're going to
21656
- // use the absolute value for comparison but multiple by 10 to additionally
21657
- // have a column for the negative sign.
21658
- if (first_line_number < 0) first_line_number = (-first_line_number) * 10;
21659
- if (last_line_number < 0) last_line_number = (-last_line_number) * 10;
21660
- int32_t max_line_number = first_line_number > last_line_number ? first_line_number : last_line_number;
21661
-
21662
- if (max_line_number < 10) {
21663
- if (colorize) {
21664
- error_format = (pm_error_format_t) {
21665
- .number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
21666
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21667
- .divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
21668
- };
21669
- } else {
21670
- error_format = (pm_error_format_t) {
21671
- .number_prefix = "%1" PRIi32 " | ",
21672
- .blank_prefix = " | ",
21673
- .divider = " ~~~~~\n"
21674
- };
21675
- }
21676
- } else if (max_line_number < 100) {
21677
- if (colorize) {
21678
- error_format = (pm_error_format_t) {
21679
- .number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
21680
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21681
- .divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
21682
- };
21683
- } else {
21684
- error_format = (pm_error_format_t) {
21685
- .number_prefix = "%2" PRIi32 " | ",
21686
- .blank_prefix = " | ",
21687
- .divider = " ~~~~~~\n"
21688
- };
21689
- }
21690
- } else if (max_line_number < 1000) {
21691
- if (colorize) {
21692
- error_format = (pm_error_format_t) {
21693
- .number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
21694
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21695
- .divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
21696
- };
21697
- } else {
21698
- error_format = (pm_error_format_t) {
21699
- .number_prefix = "%3" PRIi32 " | ",
21700
- .blank_prefix = " | ",
21701
- .divider = " ~~~~~~~\n"
21702
- };
21703
- }
21704
- } else if (max_line_number < 10000) {
21705
- if (colorize) {
21706
- error_format = (pm_error_format_t) {
21707
- .number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
21708
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21709
- .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
21710
- };
21711
- } else {
21712
- error_format = (pm_error_format_t) {
21713
- .number_prefix = "%4" PRIi32 " | ",
21714
- .blank_prefix = " | ",
21715
- .divider = " ~~~~~~~~\n"
21716
- };
21717
- }
21718
- } else {
21719
- if (colorize) {
21720
- error_format = (pm_error_format_t) {
21721
- .number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
21722
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
21723
- .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
21724
- };
21725
- } else {
21726
- error_format = (pm_error_format_t) {
21727
- .number_prefix = "%5" PRIi32 " | ",
21728
- .blank_prefix = " | ",
21729
- .divider = " ~~~~~~~~\n"
21730
- };
21731
- }
21732
- }
21733
-
21734
- error_format.blank_prefix_length = strlen(error_format.blank_prefix);
21735
- error_format.divider_length = strlen(error_format.divider);
21736
-
21737
- // Now we're going to iterate through every error in our error list and
21738
- // display it. While we're iterating, we will display some padding lines of
21739
- // the source before the error to give some context. We'll be careful not to
21740
- // display the same line twice in case the errors are close enough in the
21741
- // source.
21742
- int32_t last_line = parser->start_line - 1;
21743
- const pm_encoding_t *encoding = parser->encoding;
21744
-
21745
- for (size_t index = 0; index < error_list->size; index++) {
21746
- pm_error_t *error = &errors[index];
21747
-
21748
- // Here we determine how many lines of padding of the source to display,
21749
- // based on the difference from the last line that was displaid.
21750
- if (error->line - last_line > 1) {
21751
- if (error->line - last_line > 2) {
21752
- if ((index != 0) && (error->line - last_line > 3)) {
21753
- pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
21754
- }
21755
-
21756
- pm_buffer_append_string(buffer, " ", 2);
21757
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
21758
- }
21759
-
21760
- pm_buffer_append_string(buffer, " ", 2);
21761
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
21762
- }
21763
-
21764
- // If this is the first error or we're on a new line, then we'll display
21765
- // the line that has the error in it.
21766
- if ((index == 0) || (error->line != last_line)) {
21767
- if (colorize) {
21768
- pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 12);
21769
- } else {
21770
- pm_buffer_append_string(buffer, "> ", 2);
21771
- }
21772
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
21773
- }
21774
-
21775
- const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
21776
- if (start == parser->end) pm_buffer_append_byte(buffer, '\n');
21777
-
21778
- // Now we'll display the actual error message. We'll do this by first
21779
- // putting the prefix to the line, then a bunch of blank spaces
21780
- // depending on the column, then as many carets as we need to display
21781
- // the width of the error, then the error message itself.
21782
- //
21783
- // Note that this doesn't take into account the width of the actual
21784
- // character when displaid in the terminal. For some east-asian
21785
- // languages or emoji, this means it can be thrown off pretty badly. We
21786
- // will need to solve this eventually.
21787
- pm_buffer_append_string(buffer, " ", 2);
21788
- pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
21789
-
21790
- size_t column = 0;
21791
- while (column < error->column_start) {
21792
- pm_buffer_append_byte(buffer, ' ');
21793
-
21794
- size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21795
- column += (char_width == 0 ? 1 : char_width);
21796
- }
21797
-
21798
- if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
21799
- pm_buffer_append_byte(buffer, '^');
21800
-
21801
- size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21802
- column += (char_width == 0 ? 1 : char_width);
21803
-
21804
- while (column < error->column_end) {
21805
- pm_buffer_append_byte(buffer, '~');
21806
-
21807
- size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21808
- column += (char_width == 0 ? 1 : char_width);
21809
- }
21810
-
21811
- if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
21812
-
21813
- if (inline_messages) {
21814
- pm_buffer_append_byte(buffer, ' ');
21815
- assert(error->error != NULL);
21816
-
21817
- const char *message = error->error->message;
21818
- pm_buffer_append_string(buffer, message, strlen(message));
21819
- }
21820
-
21821
- pm_buffer_append_byte(buffer, '\n');
21822
-
21823
- // Here we determine how many lines of padding to display after the
21824
- // error, depending on where the next error is in source.
21825
- last_line = error->line;
21826
- int32_t next_line = (index == error_list->size - 1) ? (((int32_t) newline_list->size) + parser->start_line) : errors[index + 1].line;
21827
-
21828
- if (next_line - last_line > 1) {
21829
- pm_buffer_append_string(buffer, " ", 2);
21830
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
21831
- }
21832
-
21833
- if (next_line - last_line > 1) {
21834
- pm_buffer_append_string(buffer, " ", 2);
21835
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
21836
- }
21837
- }
21838
-
21839
- // Finally, we'll free the array of errors that we allocated.
21840
- xfree(errors);
21841
- }
21842
-
21843
- #undef PM_COLOR_GRAY
21844
- #undef PM_COLOR_RED
21845
- #undef PM_COLOR_RESET