prism 0.18.0 → 0.19.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +31 -1
  3. data/README.md +2 -1
  4. data/config.yml +188 -55
  5. data/docs/building.md +9 -2
  6. data/docs/configuration.md +10 -9
  7. data/docs/encoding.md +24 -56
  8. data/docs/local_variable_depth.md +229 -0
  9. data/docs/ruby_api.md +2 -0
  10. data/docs/serialization.md +18 -13
  11. data/ext/prism/api_node.c +337 -195
  12. data/ext/prism/extconf.rb +13 -7
  13. data/ext/prism/extension.c +96 -32
  14. data/ext/prism/extension.h +1 -1
  15. data/include/prism/ast.h +340 -137
  16. data/include/prism/defines.h +17 -0
  17. data/include/prism/diagnostic.h +11 -5
  18. data/include/prism/encoding.h +248 -0
  19. data/include/prism/options.h +2 -2
  20. data/include/prism/parser.h +62 -42
  21. data/include/prism/regexp.h +2 -2
  22. data/include/prism/util/pm_buffer.h +9 -1
  23. data/include/prism/util/pm_memchr.h +2 -2
  24. data/include/prism/util/pm_strpbrk.h +3 -3
  25. data/include/prism/version.h +2 -2
  26. data/include/prism.h +13 -15
  27. data/lib/prism/compiler.rb +12 -0
  28. data/lib/prism/debug.rb +9 -4
  29. data/lib/prism/desugar_compiler.rb +3 -3
  30. data/lib/prism/dispatcher.rb +56 -0
  31. data/lib/prism/dot_visitor.rb +476 -198
  32. data/lib/prism/dsl.rb +66 -46
  33. data/lib/prism/ffi.rb +16 -3
  34. data/lib/prism/lex_compat.rb +19 -9
  35. data/lib/prism/mutation_compiler.rb +20 -0
  36. data/lib/prism/node.rb +1173 -450
  37. data/lib/prism/node_ext.rb +41 -16
  38. data/lib/prism/parse_result.rb +12 -15
  39. data/lib/prism/ripper_compat.rb +49 -34
  40. data/lib/prism/serialize.rb +242 -212
  41. data/lib/prism/visitor.rb +12 -0
  42. data/lib/prism.rb +20 -4
  43. data/prism.gemspec +4 -10
  44. data/rbi/prism.rbi +605 -230
  45. data/rbi/prism_static.rbi +3 -0
  46. data/sig/prism.rbs +379 -124
  47. data/sig/prism_static.rbs +1 -0
  48. data/src/diagnostic.c +228 -222
  49. data/src/encoding.c +5137 -0
  50. data/src/node.c +66 -0
  51. data/src/options.c +21 -2
  52. data/src/prettyprint.c +806 -406
  53. data/src/prism.c +1092 -700
  54. data/src/regexp.c +3 -3
  55. data/src/serialize.c +227 -157
  56. data/src/util/pm_buffer.c +10 -1
  57. data/src/util/pm_memchr.c +1 -1
  58. data/src/util/pm_strpbrk.c +4 -4
  59. metadata +5 -11
  60. data/include/prism/enc/pm_encoding.h +0 -227
  61. data/src/enc/pm_big5.c +0 -116
  62. data/src/enc/pm_cp51932.c +0 -57
  63. data/src/enc/pm_euc_jp.c +0 -69
  64. data/src/enc/pm_gbk.c +0 -65
  65. data/src/enc/pm_shift_jis.c +0 -57
  66. data/src/enc/pm_tables.c +0 -2073
  67. data/src/enc/pm_unicode.c +0 -2369
  68. data/src/enc/pm_windows_31j.c +0 -57
data/src/prism.c CHANGED
@@ -40,6 +40,7 @@ debug_context(pm_context_t context) {
40
40
  case PM_CONTEXT_DEF_PARAMS: return "DEF_PARAMS";
41
41
  case PM_CONTEXT_DEFAULT_PARAMS: return "DEFAULT_PARAMS";
42
42
  case PM_CONTEXT_ENSURE: return "ENSURE";
43
+ case PM_CONTEXT_ENSURE_DEF: return "ENSURE_DEF";
43
44
  case PM_CONTEXT_ELSE: return "ELSE";
44
45
  case PM_CONTEXT_ELSIF: return "ELSIF";
45
46
  case PM_CONTEXT_EMBEXPR: return "EMBEXPR";
@@ -56,6 +57,8 @@ debug_context(pm_context_t context) {
56
57
  case PM_CONTEXT_PREEXE: return "PREEXE";
57
58
  case PM_CONTEXT_RESCUE: return "RESCUE";
58
59
  case PM_CONTEXT_RESCUE_ELSE: return "RESCUE_ELSE";
60
+ case PM_CONTEXT_RESCUE_ELSE_DEF: return "RESCUE_ELSE_DEF";
61
+ case PM_CONTEXT_RESCUE_DEF: return "RESCUE_DEF";
59
62
  case PM_CONTEXT_SCLASS: return "SCLASS";
60
63
  case PM_CONTEXT_UNLESS: return "UNLESS";
61
64
  case PM_CONTEXT_UNTIL: return "UNTIL";
@@ -272,6 +275,7 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
272
275
  breakpoints[index++] = incrementor;
273
276
  }
274
277
 
278
+ parser->explicit_encoding = NULL;
275
279
  return lex_mode_push(parser, lex_mode);
276
280
  }
277
281
 
@@ -353,6 +357,7 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed
353
357
  breakpoints[index++] = incrementor;
354
358
  }
355
359
 
360
+ parser->explicit_encoding = NULL;
356
361
  return lex_mode_push(parser, lex_mode);
357
362
  }
358
363
 
@@ -536,7 +541,7 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
536
541
  * Append an error to the list of errors on the parser using the location of the
537
542
  * given token and a format string.
538
543
  */
539
- #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, token->start, token->end, diag_id, __VA_ARGS__)
544
+ #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (token).start, (token).end, diag_id, __VA_ARGS__)
540
545
 
541
546
  /**
542
547
  * Append a warning to the list of warnings on the parser.
@@ -776,8 +781,7 @@ pm_conditional_predicate(pm_node_t *node) {
776
781
  * parentheses. In these cases we set the token to the "not provided" type. For
777
782
  * example:
778
783
  *
779
- * pm_token_t token;
780
- * not_provided(&token, parser->previous.end);
784
+ * pm_token_t token = not_provided(parser);
781
785
  */
782
786
  static inline pm_token_t
783
787
  not_provided(pm_parser_t *parser) {
@@ -860,6 +864,27 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
860
864
  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
861
865
  }
862
866
 
867
+ /******************************************************************************/
868
+ /* Node flag handling functions */
869
+ /******************************************************************************/
870
+
871
+ /**
872
+ * Set the given flag on the given node.
873
+ */
874
+ static inline void
875
+ pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
876
+ node->flags |= flag;
877
+ }
878
+
879
+ /**
880
+ * Remove the given flag from the given node.
881
+ */
882
+ static inline void
883
+ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
884
+ node->flags &= (pm_node_flags_t) ~flag;
885
+ }
886
+
887
+
863
888
  /******************************************************************************/
864
889
  /* Node creation functions */
865
890
  /******************************************************************************/
@@ -1148,8 +1173,12 @@ pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
1148
1173
 
1149
1174
  // If the element is not a static literal, then the array is not a static
1150
1175
  // literal. Turn that flag off.
1151
- if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || (element->flags & PM_NODE_FLAG_STATIC_LITERAL) == 0) {
1152
- node->base.flags &= (pm_node_flags_t) ~PM_NODE_FLAG_STATIC_LITERAL;
1176
+ if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
1177
+ pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
1178
+ }
1179
+
1180
+ if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
1181
+ pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
1153
1182
  }
1154
1183
  }
1155
1184
 
@@ -1193,7 +1222,7 @@ pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *node
1193
1222
  for (size_t index = 0; index < nodes->size; index++) {
1194
1223
  pm_node_t *child = nodes->nodes[index];
1195
1224
 
1196
- if (!found_rest && PM_NODE_TYPE_P(child, PM_SPLAT_NODE)) {
1225
+ if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
1197
1226
  node->rest = child;
1198
1227
  found_rest = true;
1199
1228
  } else if (found_rest) {
@@ -1461,7 +1490,7 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p
1461
1490
  * Allocate and initialize a new BlockNode node.
1462
1491
  */
1463
1492
  static pm_block_node_t *
1464
- pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_block_parameters_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
1493
+ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_t locals_body_index, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
1465
1494
  pm_block_node_t *node = PM_ALLOC_NODE(parser, pm_block_node_t);
1466
1495
 
1467
1496
  *node = (pm_block_node_t) {
@@ -1470,6 +1499,7 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p
1470
1499
  .location = { .start = opening->start, .end = closing->end },
1471
1500
  },
1472
1501
  .locals = *locals,
1502
+ .locals_body_index = locals_body_index,
1473
1503
  .parameters = parameters,
1474
1504
  .body = body,
1475
1505
  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
@@ -1711,7 +1741,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
1711
1741
  node->block = arguments->block;
1712
1742
 
1713
1743
  if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
1714
- node->base.flags |= PM_CALL_NODE_FLAGS_SAFE_NAVIGATION;
1744
+ pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
1715
1745
  }
1716
1746
 
1717
1747
  node->name = pm_parser_constant_id_token(parser, message);
@@ -1785,7 +1815,7 @@ pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token
1785
1815
  node->block = arguments->block;
1786
1816
 
1787
1817
  if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
1788
- node->base.flags |= PM_CALL_NODE_FLAGS_SAFE_NAVIGATION;
1818
+ pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
1789
1819
  }
1790
1820
 
1791
1821
  node->name = pm_parser_constant_id_constant(parser, "call", 4);
@@ -1832,12 +1862,12 @@ pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
1832
1862
  */
1833
1863
  static inline bool
1834
1864
  pm_call_node_variable_call_p(pm_call_node_t *node) {
1835
- return node->base.flags & PM_CALL_NODE_FLAGS_VARIABLE_CALL;
1865
+ return PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
1836
1866
  }
1837
1867
 
1838
1868
  /**
1839
- * Returns whether or not this call is to the [] method in the index form (as
1840
- * opposed to `foo.[]`).
1869
+ * Returns whether or not this call is to the [] method in the index form without a block (as
1870
+ * opposed to `foo.[]` and `foo[] { }`).
1841
1871
  */
1842
1872
  static inline bool
1843
1873
  pm_call_node_index_p(pm_call_node_t *node) {
@@ -1845,7 +1875,8 @@ pm_call_node_index_p(pm_call_node_t *node) {
1845
1875
  (node->call_operator_loc.start == NULL) &&
1846
1876
  (node->message_loc.start != NULL) &&
1847
1877
  (node->message_loc.start[0] == '[') &&
1848
- (node->message_loc.end[-1] == ']')
1878
+ (node->message_loc.end[-1] == ']') &&
1879
+ (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE))
1849
1880
  );
1850
1881
  }
1851
1882
 
@@ -2101,6 +2132,63 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
2101
2132
  return node;
2102
2133
  }
2103
2134
 
2135
+ /**
2136
+ * Allocate and initialize a new CallTargetNode node from an existing call
2137
+ * node.
2138
+ */
2139
+ static pm_call_target_node_t *
2140
+ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
2141
+ pm_call_target_node_t *node = PM_ALLOC_NODE(parser, pm_call_target_node_t);
2142
+
2143
+ *node = (pm_call_target_node_t) {
2144
+ {
2145
+ .type = PM_CALL_TARGET_NODE,
2146
+ .flags = target->base.flags,
2147
+ .location = target->base.location
2148
+ },
2149
+ .receiver = target->receiver,
2150
+ .call_operator_loc = target->call_operator_loc,
2151
+ .name = target->name,
2152
+ .message_loc = target->message_loc
2153
+ };
2154
+
2155
+ // Here we're going to free the target, since it is no longer necessary.
2156
+ // However, we don't want to call `pm_node_destroy` because we want to keep
2157
+ // around all of its children since we just reused them.
2158
+ free(target);
2159
+
2160
+ return node;
2161
+ }
2162
+
2163
+ /**
2164
+ * Allocate and initialize a new IndexTargetNode node from an existing call
2165
+ * node.
2166
+ */
2167
+ static pm_index_target_node_t *
2168
+ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
2169
+ pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
2170
+
2171
+ *node = (pm_index_target_node_t) {
2172
+ {
2173
+ .type = PM_INDEX_TARGET_NODE,
2174
+ .flags = target->base.flags,
2175
+ .location = target->base.location
2176
+ },
2177
+ .receiver = target->receiver,
2178
+ .opening_loc = target->opening_loc,
2179
+ .arguments = target->arguments,
2180
+ .closing_loc = target->closing_loc,
2181
+ .block = target->block
2182
+ };
2183
+
2184
+ // Here we're going to free the target, since it is no longer necessary.
2185
+ // However, we don't want to call `pm_node_destroy` because we want to keep
2186
+ // around all of its children since we just reused them.
2187
+ free(target);
2188
+
2189
+ return node;
2190
+ }
2191
+
2104
2192
  /**
2105
2193
  * Allocate and initialize a new CapturePatternNode node.
2106
2194
  */
@@ -2452,6 +2540,8 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
2452
2540
  */
2453
2541
  static pm_constant_path_node_t *
2454
2542
  pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, pm_node_t *child) {
2543
+ pm_assert_value_expression(parser, parent);
2544
+
2455
2545
  pm_constant_path_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_node_t);
2456
2546
 
2457
2547
  *node = (pm_constant_path_node_t) {
@@ -2622,6 +2712,7 @@ pm_def_node_create(
2622
2712
  pm_parameters_node_t *parameters,
2623
2713
  pm_node_t *body,
2624
2714
  pm_constant_id_list_t *locals,
2715
+ uint32_t locals_body_index,
2625
2716
  const pm_token_t *def_keyword,
2626
2717
  const pm_token_t *operator,
2627
2718
  const pm_token_t *lparen,
@@ -2649,6 +2740,7 @@ pm_def_node_create(
2649
2740
  .parameters = parameters,
2650
2741
  .body = body,
2651
2742
  .locals = *locals,
2743
+ .locals_body_index = locals_body_index,
2652
2744
  .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
2653
2745
  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2654
2746
  .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
@@ -3256,10 +3348,16 @@ static inline void
3256
3348
  pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
3257
3349
  pm_node_list_append(&hash->elements, element);
3258
3350
 
3259
- // If the element is not a static literal, then the hash is not a static
3260
- // literal. Turn that flag off.
3261
- if ((element->flags & PM_NODE_FLAG_STATIC_LITERAL) == 0) {
3262
- hash->base.flags &= (pm_node_flags_t) ~PM_NODE_FLAG_STATIC_LITERAL;
3351
+ bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
3352
+ if (static_literal) {
3353
+ pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
3354
+ static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
3355
+ static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
3356
+ static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
3357
+ }
3358
+
3359
+ if (!static_literal) {
3360
+ pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
3263
3361
  }
3264
3362
  }
3265
3363
 
@@ -3416,6 +3514,25 @@ pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
3416
3514
  return node;
3417
3515
  }
3418
3516
 
3517
+ /**
3518
+ * Allocate and initialize a new ImplicitRestNode node.
3519
+ */
3520
+ static pm_implicit_rest_node_t *
3521
+ pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
3522
+ assert(token->type == PM_TOKEN_COMMA);
3523
+
3524
+ pm_implicit_rest_node_t *node = PM_ALLOC_NODE(parser, pm_implicit_rest_node_t);
3525
+
3526
+ *node = (pm_implicit_rest_node_t) {
3527
+ {
3528
+ .type = PM_IMPLICIT_REST_NODE,
3529
+ .location = PM_LOCATION_TOKEN_VALUE(token)
3530
+ }
3531
+ };
3532
+
3533
+ return node;
3534
+ }
3535
+
3419
3536
  /**
3420
3537
  * Allocate and initialize a new IntegerNode node.
3421
3538
  */
@@ -3697,7 +3814,7 @@ static inline void
3697
3814
  pm_interpolated_regular_expression_node_closing_set(pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
3698
3815
  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
3699
3816
  node->base.location.end = closing->end;
3700
- node->base.flags |= pm_regular_expression_flags_create(closing);
3817
+ pm_node_flag_set((pm_node_t *)node, pm_regular_expression_flags_create(closing));
3701
3818
  }
3702
3819
 
3703
3820
  /**
@@ -3831,7 +3948,8 @@ pm_keyword_hash_node_create(pm_parser_t *parser) {
3831
3948
  *node = (pm_keyword_hash_node_t) {
3832
3949
  .base = {
3833
3950
  .type = PM_KEYWORD_HASH_NODE,
3834
- .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
3951
+ .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3952
+ .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS
3835
3953
  },
3836
3954
  .elements = { 0 }
3837
3955
  };
@@ -3844,6 +3962,13 @@ pm_keyword_hash_node_create(pm_parser_t *parser) {
3844
3962
  */
3845
3963
  static void
3846
3964
  pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
3965
+ // If the element being added is not an AssocNode or does not have a symbol key, then
3966
+ // we want to turn the STATIC_KEYS flag off.
3967
+ // TODO: Rename the flag to SYMBOL_KEYS instead.
3968
+ if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
3969
+ pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
3970
+ }
3971
+
3847
3972
  pm_node_list_append(&hash->elements, element);
3848
3973
  if (hash->base.location.start == NULL) {
3849
3974
  hash->base.location.start = element->location.start;
@@ -3926,10 +4051,11 @@ static pm_lambda_node_t *
3926
4051
  pm_lambda_node_create(
3927
4052
  pm_parser_t *parser,
3928
4053
  pm_constant_id_list_t *locals,
4054
+ uint32_t locals_body_index,
3929
4055
  const pm_token_t *operator,
3930
4056
  const pm_token_t *opening,
3931
4057
  const pm_token_t *closing,
3932
- pm_block_parameters_node_t *parameters,
4058
+ pm_node_t *parameters,
3933
4059
  pm_node_t *body
3934
4060
  ) {
3935
4061
  pm_lambda_node_t *node = PM_ALLOC_NODE(parser, pm_lambda_node_t);
@@ -3943,6 +4069,7 @@ pm_lambda_node_create(
3943
4069
  },
3944
4070
  },
3945
4071
  .locals = *locals,
4072
+ .locals_body_index = locals_body_index,
3946
4073
  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3947
4074
  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3948
4075
  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
@@ -4038,6 +4165,12 @@ pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, c
4038
4165
  */
4039
4166
  static pm_local_variable_read_node_t *
4040
4167
  pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
4168
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
4169
+
4170
+ if (parser->current_param_name == name_id) {
4171
+ pm_parser_err_token(parser, name, PM_ERR_PARAMETER_CIRCULAR);
4172
+ }
4173
+
4041
4174
  pm_local_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_read_node_t);
4042
4175
 
4043
4176
  *node = (pm_local_variable_read_node_t) {
@@ -4045,7 +4178,7 @@ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name,
4045
4178
  .type = PM_LOCAL_VARIABLE_READ_NODE,
4046
4179
  .location = PM_LOCATION_TOKEN_VALUE(name)
4047
4180
  },
4048
- .name = pm_parser_constant_id_token(parser, name),
4181
+ .name = name_id,
4049
4182
  .depth = depth
4050
4183
  };
4051
4184
 
@@ -4132,6 +4265,21 @@ pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name
4132
4265
  );
4133
4266
  }
4134
4267
 
4268
+ /**
4269
+ * Allocate and initialize a new LocalVariableTargetNode node with the given depth.
4270
+ */
4271
+ static pm_local_variable_target_node_t *
4272
+ pm_local_variable_target_node_create_depth(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
4273
+ pm_refute_numbered_parameter(parser, name->start, name->end);
4274
+
4275
+ return pm_local_variable_target_node_create_values(
4276
+ parser,
4277
+ &(pm_location_t) { .start = name->start, .end = name->end },
4278
+ pm_parser_constant_id_token(parser, name),
4279
+ depth
4280
+ );
4281
+ }
4282
+
4135
4283
  /**
4136
4284
  * Allocate and initialize a new MatchPredicateNode node.
4137
4285
  */
@@ -4254,7 +4402,7 @@ pm_multi_target_node_create(pm_parser_t *parser) {
4254
4402
  */
4255
4403
  static void
4256
4404
  pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
4257
- if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
4405
+ if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE) || PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
4258
4406
  if (node->rest == NULL) {
4259
4407
  node->rest = target;
4260
4408
  } else {
@@ -4390,7 +4538,25 @@ pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *oper
4390
4538
  }
4391
4539
 
4392
4540
  /**
4393
- * Allocate a new NthReferenceReadNode node.
4541
+ * Allocate and initialize a new NumberedParametersNode node.
4542
+ */
4543
+ static pm_numbered_parameters_node_t *
4544
+ pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
4545
+ pm_numbered_parameters_node_t *node = PM_ALLOC_NODE(parser, pm_numbered_parameters_node_t);
4546
+
4547
+ *node = (pm_numbered_parameters_node_t) {
4548
+ {
4549
+ .type = PM_NUMBERED_PARAMETERS_NODE,
4550
+ .location = *location
4551
+ },
4552
+ .maximum = maximum
4553
+ };
4554
+
4555
+ return node;
4556
+ }
4557
+
4558
+ /**
4559
+ * Allocate and initialize a new NthReferenceReadNode node.
4394
4560
  */
4395
4561
  static pm_numbered_reference_read_node_t *
4396
4562
  pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
@@ -4530,9 +4696,8 @@ pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param)
4530
4696
  * Set the rest parameter on a ParametersNode node.
4531
4697
  */
4532
4698
  static void
4533
- pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_rest_parameter_node_t *param) {
4534
- assert(params->rest == NULL);
4535
- pm_parameters_node_location_set(params, (pm_node_t *) param);
4699
+ pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
4700
+ pm_parameters_node_location_set(params, param);
4536
4701
  params->rest = param;
4537
4702
  }
4538
4703
 
@@ -5124,7 +5289,7 @@ pm_statements_node_body_append(pm_statements_node_t *node, pm_node_t *statement)
5124
5289
  pm_node_list_append(&node->body, statement);
5125
5290
 
5126
5291
  // Every statement gets marked as a place where a newline can occur.
5127
- statement->flags |= PM_NODE_FLAG_NEWLINE;
5292
+ pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
5128
5293
  }
5129
5294
 
5130
5295
  /**
@@ -5643,6 +5808,7 @@ pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening,
5643
5808
  *node = (pm_x_string_node_t) {
5644
5809
  {
5645
5810
  .type = PM_X_STRING_NODE,
5811
+ .flags = PM_STRING_FLAGS_FROZEN,
5646
5812
  .location = {
5647
5813
  .start = opening->start,
5648
5814
  .end = closing->end
@@ -5718,8 +5884,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
5718
5884
  .previous = parser->current_scope,
5719
5885
  .closed = closed,
5720
5886
  .explicit_params = false,
5721
- .numbered_params = false,
5722
- .transparent = false
5887
+ .numbered_parameters = 0,
5723
5888
  };
5724
5889
 
5725
5890
  pm_constant_id_list_init(&scope->locals);
@@ -5728,27 +5893,6 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
5728
5893
  return true;
5729
5894
  }
5730
5895
 
5731
- /**
5732
- * Allocate and initialize a new scope. Push it onto the scope stack.
5733
- */
5734
- static bool
5735
- pm_parser_scope_push_transparent(pm_parser_t *parser) {
5736
- pm_scope_t *scope = (pm_scope_t *) malloc(sizeof(pm_scope_t));
5737
- if (scope == NULL) return false;
5738
-
5739
- *scope = (pm_scope_t) {
5740
- .previous = parser->current_scope,
5741
- .closed = false,
5742
- .explicit_params = false,
5743
- .numbered_params = false,
5744
- .transparent = true
5745
- };
5746
-
5747
- parser->current_scope = scope;
5748
-
5749
- return true;
5750
- }
5751
-
5752
5896
  /**
5753
5897
  * Check if any of the currently visible scopes contain a local variable
5754
5898
  * described by the given constant id.
@@ -5759,7 +5903,7 @@ pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant
5759
5903
  int depth = 0;
5760
5904
 
5761
5905
  while (scope != NULL) {
5762
- if (!scope->transparent && pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
5906
+ if (pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
5763
5907
  if (scope->closed) break;
5764
5908
 
5765
5909
  scope = scope->previous;
@@ -5784,15 +5928,19 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
5784
5928
  */
5785
5929
  static inline void
5786
5930
  pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id) {
5787
- pm_scope_t *scope = parser->current_scope;
5788
- while (scope && scope->transparent) scope = scope->previous;
5789
-
5790
- assert(scope != NULL);
5791
- if (!pm_constant_id_list_includes(&scope->locals, constant_id)) {
5792
- pm_constant_id_list_append(&scope->locals, constant_id);
5931
+ if (!pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
5932
+ pm_constant_id_list_append(&parser->current_scope->locals, constant_id);
5793
5933
  }
5794
5934
  }
5795
5935
 
5936
+ /**
5937
+ * Set the numbered_parameters value of the current scope.
5938
+ */
5939
+ static inline void
5940
+ pm_parser_numbered_parameters_set(pm_parser_t *parser, uint8_t numbered_parameters) {
5941
+ parser->current_scope->numbered_parameters = numbered_parameters;
5942
+ }
5943
+
5796
5944
  /**
5797
5945
  * Add a local variable from a location to the current scope.
5798
5946
  */
@@ -5869,12 +6017,12 @@ static inline size_t
5869
6017
  char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
5870
6018
  if (parser->encoding_changed) {
5871
6019
  size_t width;
5872
- if ((width = parser->encoding.alpha_char(b, parser->end - b)) != 0) {
6020
+ if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
5873
6021
  return width;
5874
6022
  } else if (*b == '_') {
5875
6023
  return 1;
5876
6024
  } else if (*b >= 0x80) {
5877
- return parser->encoding.char_width(b, parser->end - b);
6025
+ return parser->encoding->char_width(b, parser->end - b);
5878
6026
  } else {
5879
6027
  return 0;
5880
6028
  }
@@ -5885,6 +6033,19 @@ char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
5885
6033
  }
5886
6034
  }
5887
6035
 
6036
+ /**
6037
+ * Similar to char_is_identifier but this function assumes that the encoding
6038
+ * has not been changed.
6039
+ */
6040
+ static inline size_t
6041
+ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
6042
+ if (*b < 0x80) {
6043
+ return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
6044
+ } else {
6045
+ return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
6046
+ }
6047
+ }
6048
+
5888
6049
  /**
5889
6050
  * Like the above, this function is also used extremely frequently to lex all of
5890
6051
  * the identifiers in a source file once the first character has been found. So
@@ -5894,20 +6055,17 @@ static inline size_t
5894
6055
  char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
5895
6056
  if (parser->encoding_changed) {
5896
6057
  size_t width;
5897
- if ((width = parser->encoding.alnum_char(b, parser->end - b)) != 0) {
6058
+ if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
5898
6059
  return width;
5899
6060
  } else if (*b == '_') {
5900
6061
  return 1;
5901
6062
  } else if (*b >= 0x80) {
5902
- return parser->encoding.char_width(b, parser->end - b);
6063
+ return parser->encoding->char_width(b, parser->end - b);
5903
6064
  } else {
5904
6065
  return 0;
5905
6066
  }
5906
- } else if (*b < 0x80) {
5907
- return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_');
5908
- } else {
5909
- return (size_t) (pm_encoding_utf_8_alnum_char(b, parser->end - b) || 1u);
5910
6067
  }
6068
+ return char_is_identifier_utf8(b, parser->end);
5911
6069
  }
5912
6070
 
5913
6071
  // Here we're defining a perfect hash for the characters that are allowed in
@@ -6082,195 +6240,18 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
6082
6240
  */
6083
6241
  static bool
6084
6242
  parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
6085
- size_t width = (size_t) (end - start);
6086
-
6087
- // First, we're going to call out to a user-defined callback if one was
6088
- // provided. If they return an encoding struct that we can use, then we'll
6089
- // use that here.
6090
- if (parser->encoding_decode_callback != NULL) {
6091
- pm_encoding_t *encoding = parser->encoding_decode_callback(parser, start, width);
6243
+ const pm_encoding_t *encoding = pm_encoding_find(start, end);
6092
6244
 
6093
- if (encoding != NULL) {
6094
- parser->encoding = *encoding;
6095
- return true;
6096
- }
6097
- }
6098
-
6099
- // Next, we're going to check for UTF-8. This is the most common encoding.
6100
- // utf-8 can contain extra information at the end about the platform it is
6101
- // encoded on, such as utf-8-mac or utf-8-unix. We'll ignore those suffixes.
6102
- if ((start + 5 <= end) && (pm_strncasecmp(start, (const uint8_t *) "utf-8", 5) == 0)) {
6103
- // We need to explicitly handle utf-8-hfs, as that one needs to switch
6104
- // over to being utf8-mac.
6105
- if (width == 9 && (pm_strncasecmp(start + 5, (const uint8_t *) "-hfs", 4) == 0)) {
6106
- parser->encoding = pm_encoding_utf8_mac;
6245
+ if (encoding != NULL) {
6246
+ if (encoding != PM_ENCODING_UTF_8_ENTRY) {
6247
+ parser->encoding = encoding;
6107
6248
  parser->encoding_changed = true;
6108
6249
  if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
6109
- return true;
6110
6250
  }
6111
6251
 
6112
- // We don't need to do anything here because the default encoding is
6113
- // already UTF-8. We'll just return.
6114
6252
  return true;
6115
6253
  }
6116
6254
 
6117
- // Next, we're going to loop through each of the encodings that we handle
6118
- // explicitly. If we found one that we understand, we'll use that value.
6119
- #define ENCODING1(value, prebuilt) \
6120
- if (width == sizeof(value) - 1 && start + width <= end && pm_strncasecmp(start, (const uint8_t *) value, width) == 0) { \
6121
- parser->encoding = prebuilt; \
6122
- parser->encoding_changed = true; \
6123
- if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
6124
- return true; \
6125
- }
6126
-
6127
- // A convenience macros for comparing two aliases for the same encoding.
6128
- #define ENCODING2(value1, value2, prebuilt) ENCODING1(value1, prebuilt) ENCODING1(value2, prebuilt)
6129
-
6130
- if (width >= 3) {
6131
- switch (*start) {
6132
- case 'A': case 'a':
6133
- ENCODING1("ASCII", pm_encoding_ascii);
6134
- ENCODING1("ASCII-8BIT", pm_encoding_ascii_8bit);
6135
- ENCODING1("ANSI_X3.4-1968", pm_encoding_ascii);
6136
- break;
6137
- case 'B': case 'b':
6138
- ENCODING1("BINARY", pm_encoding_ascii_8bit);
6139
- ENCODING1("Big5", pm_encoding_big5);
6140
- ENCODING1("Big5-HKSCS", pm_encoding_big5_hkscs);
6141
- ENCODING1("Big5-UAO", pm_encoding_big5_uao);
6142
- break;
6143
- case 'C': case 'c':
6144
- ENCODING1("CP437", pm_encoding_ibm437);
6145
- ENCODING1("CP720", pm_encoding_ibm720);
6146
- ENCODING1("CP737", pm_encoding_ibm737);
6147
- ENCODING1("CP775", pm_encoding_ibm775);
6148
- ENCODING1("CP850", pm_encoding_cp850);
6149
- ENCODING1("CP852", pm_encoding_cp852);
6150
- ENCODING1("CP855", pm_encoding_cp855);
6151
- ENCODING1("CP857", pm_encoding_ibm857);
6152
- ENCODING1("CP860", pm_encoding_ibm860);
6153
- ENCODING1("CP861", pm_encoding_ibm861);
6154
- ENCODING1("CP862", pm_encoding_ibm862);
6155
- ENCODING1("CP864", pm_encoding_ibm864);
6156
- ENCODING1("CP865", pm_encoding_ibm865);
6157
- ENCODING1("CP866", pm_encoding_ibm866);
6158
- ENCODING1("CP869", pm_encoding_ibm869);
6159
- ENCODING1("CP874", pm_encoding_windows_874);
6160
- ENCODING1("CP878", pm_encoding_koi8_r);
6161
- ENCODING1("CP863", pm_encoding_ibm863);
6162
- ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j);
6163
- ENCODING1("CP936", pm_encoding_gbk);
6164
- ENCODING1("CP1250", pm_encoding_windows_1250);
6165
- ENCODING1("CP1251", pm_encoding_windows_1251);
6166
- ENCODING1("CP1252", pm_encoding_windows_1252);
6167
- ENCODING1("CP1253", pm_encoding_windows_1253);
6168
- ENCODING1("CP1254", pm_encoding_windows_1254);
6169
- ENCODING1("CP1255", pm_encoding_windows_1255);
6170
- ENCODING1("CP1256", pm_encoding_windows_1256);
6171
- ENCODING1("CP1257", pm_encoding_windows_1257);
6172
- ENCODING1("CP1258", pm_encoding_windows_1258);
6173
- ENCODING1("CP51932", pm_encoding_cp51932);
6174
- ENCODING1("CP65001", pm_encoding_utf_8);
6175
- break;
6176
- case 'E': case 'e':
6177
- ENCODING2("EUC-JP", "eucJP", pm_encoding_euc_jp);
6178
- ENCODING1("external", pm_encoding_utf_8);
6179
- break;
6180
- case 'F': case 'f':
6181
- ENCODING1("filesystem", pm_encoding_utf_8);
6182
- break;
6183
- case 'G': case 'g':
6184
- ENCODING1("GB1988", pm_encoding_gb1988);
6185
- ENCODING1("GBK", pm_encoding_gbk);
6186
- break;
6187
- case 'I': case 'i':
6188
- ENCODING1("IBM437", pm_encoding_ibm437);
6189
- ENCODING1("IBM720", pm_encoding_ibm720);
6190
- ENCODING1("IBM737", pm_encoding_ibm737);
6191
- ENCODING1("IBM775", pm_encoding_ibm775);
6192
- ENCODING1("IBM850", pm_encoding_cp850);
6193
- ENCODING1("IBM852", pm_encoding_ibm852);
6194
- ENCODING1("IBM855", pm_encoding_ibm855);
6195
- ENCODING1("IBM857", pm_encoding_ibm857);
6196
- ENCODING1("IBM860", pm_encoding_ibm860);
6197
- ENCODING1("IBM861", pm_encoding_ibm861);
6198
- ENCODING1("IBM862", pm_encoding_ibm862);
6199
- ENCODING1("IBM863", pm_encoding_ibm863);
6200
- ENCODING1("IBM864", pm_encoding_ibm864);
6201
- ENCODING1("IBM865", pm_encoding_ibm865);
6202
- ENCODING1("IBM866", pm_encoding_ibm866);
6203
- ENCODING1("IBM869", pm_encoding_ibm869);
6204
- ENCODING2("ISO-8859-1", "ISO8859-1", pm_encoding_iso_8859_1);
6205
- ENCODING2("ISO-8859-2", "ISO8859-2", pm_encoding_iso_8859_2);
6206
- ENCODING2("ISO-8859-3", "ISO8859-3", pm_encoding_iso_8859_3);
6207
- ENCODING2("ISO-8859-4", "ISO8859-4", pm_encoding_iso_8859_4);
6208
- ENCODING2("ISO-8859-5", "ISO8859-5", pm_encoding_iso_8859_5);
6209
- ENCODING2("ISO-8859-6", "ISO8859-6", pm_encoding_iso_8859_6);
6210
- ENCODING2("ISO-8859-7", "ISO8859-7", pm_encoding_iso_8859_7);
6211
- ENCODING2("ISO-8859-8", "ISO8859-8", pm_encoding_iso_8859_8);
6212
- ENCODING2("ISO-8859-9", "ISO8859-9", pm_encoding_iso_8859_9);
6213
- ENCODING2("ISO-8859-10", "ISO8859-10", pm_encoding_iso_8859_10);
6214
- ENCODING2("ISO-8859-11", "ISO8859-11", pm_encoding_iso_8859_11);
6215
- ENCODING2("ISO-8859-13", "ISO8859-13", pm_encoding_iso_8859_13);
6216
- ENCODING2("ISO-8859-14", "ISO8859-14", pm_encoding_iso_8859_14);
6217
- ENCODING2("ISO-8859-15", "ISO8859-15", pm_encoding_iso_8859_15);
6218
- ENCODING2("ISO-8859-16", "ISO8859-16", pm_encoding_iso_8859_16);
6219
- break;
6220
- case 'K': case 'k':
6221
- ENCODING1("KOI8-R", pm_encoding_koi8_r);
6222
- break;
6223
- case 'L': case 'l':
6224
- ENCODING1("locale", pm_encoding_utf_8);
6225
- break;
6226
- case 'M': case 'm':
6227
- ENCODING1("macCentEuro", pm_encoding_mac_cent_euro);
6228
- ENCODING1("macCroatian", pm_encoding_mac_croatian);
6229
- ENCODING1("macCyrillic", pm_encoding_mac_cyrillic);
6230
- ENCODING1("macGreek", pm_encoding_mac_greek);
6231
- ENCODING1("macIceland", pm_encoding_mac_iceland);
6232
- ENCODING1("macRoman", pm_encoding_mac_roman);
6233
- ENCODING1("macRomania", pm_encoding_mac_romania);
6234
- ENCODING1("macThai", pm_encoding_mac_thai);
6235
- ENCODING1("macTurkish", pm_encoding_mac_turkish);
6236
- ENCODING1("macUkraine", pm_encoding_mac_ukraine);
6237
- break;
6238
- case 'P': case 'p':
6239
- ENCODING1("PCK", pm_encoding_windows_31j);
6240
- break;
6241
- case 'S': case 's':
6242
- ENCODING1("Shift_JIS", pm_encoding_shift_jis);
6243
- ENCODING1("SJIS", pm_encoding_windows_31j);
6244
- break;
6245
- case 'T': case 't':
6246
- ENCODING1("TIS-620", pm_encoding_tis_620);
6247
- break;
6248
- case 'U': case 'u':
6249
- ENCODING1("US-ASCII", pm_encoding_ascii);
6250
- ENCODING2("UTF8-MAC", "UTF-8-HFS", pm_encoding_utf8_mac);
6251
- break;
6252
- case 'W': case 'w':
6253
- ENCODING1("Windows-31J", pm_encoding_windows_31j);
6254
- ENCODING1("Windows-874", pm_encoding_windows_874);
6255
- ENCODING1("Windows-1250", pm_encoding_windows_1250);
6256
- ENCODING1("Windows-1251", pm_encoding_windows_1251);
6257
- ENCODING1("Windows-1252", pm_encoding_windows_1252);
6258
- ENCODING1("Windows-1253", pm_encoding_windows_1253);
6259
- ENCODING1("Windows-1254", pm_encoding_windows_1254);
6260
- ENCODING1("Windows-1255", pm_encoding_windows_1255);
6261
- ENCODING1("Windows-1256", pm_encoding_windows_1256);
6262
- ENCODING1("Windows-1257", pm_encoding_windows_1257);
6263
- ENCODING1("Windows-1258", pm_encoding_windows_1258);
6264
- break;
6265
- case '6':
6266
- ENCODING1("646", pm_encoding_ascii);
6267
- break;
6268
- }
6269
- }
6270
-
6271
- #undef ENCODING2
6272
- #undef ENCODING1
6273
-
6274
6255
  return false;
6275
6256
  }
6276
6257
 
@@ -6319,7 +6300,7 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) {
6319
6300
  }
6320
6301
 
6321
6302
  const uint8_t *value_start = cursor;
6322
- while ((*cursor == '-' || *cursor == '_' || parser->encoding.alnum_char(cursor, 1)) && ++cursor < end);
6303
+ while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
6323
6304
 
6324
6305
  if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
6325
6306
  // If we were unable to parse the encoding value, then we've got an
@@ -6353,7 +6334,7 @@ pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
6353
6334
  */
6354
6335
  static inline const uint8_t *
6355
6336
  parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
6356
- while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
6337
+ while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
6357
6338
  if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
6358
6339
  return cursor;
6359
6340
  }
@@ -6443,7 +6424,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
6443
6424
  // underscores. We only need to do this if there _is_ a dash in the key.
6444
6425
  pm_string_t key;
6445
6426
  const size_t key_length = (size_t) (key_end - key_start);
6446
- const uint8_t *dash = pm_memchr(key_start, '-', (size_t) key_length, parser->encoding_changed, &parser->encoding);
6427
+ const uint8_t *dash = pm_memchr(key_start, '-', (size_t) key_length, parser->encoding_changed, parser->encoding);
6447
6428
 
6448
6429
  if (dash == NULL) {
6449
6430
  pm_string_shared_init(&key, key_start, key_end);
@@ -6455,7 +6436,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
6455
6436
  memcpy(buffer, key_start, width);
6456
6437
  buffer[dash - key_start] = '_';
6457
6438
 
6458
- while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, &parser->encoding)) != NULL) {
6439
+ while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
6459
6440
  buffer[dash - key_start] = '_';
6460
6441
  }
6461
6442
 
@@ -6530,6 +6511,7 @@ context_terminator(pm_context_t context, pm_token_t *token) {
6530
6511
  case PM_CONTEXT_ELSE:
6531
6512
  case PM_CONTEXT_FOR:
6532
6513
  case PM_CONTEXT_ENSURE:
6514
+ case PM_CONTEXT_ENSURE_DEF:
6533
6515
  return token->type == PM_TOKEN_KEYWORD_END;
6534
6516
  case PM_CONTEXT_FOR_INDEX:
6535
6517
  return token->type == PM_TOKEN_KEYWORD_IN;
@@ -6550,8 +6532,10 @@ context_terminator(pm_context_t context, pm_token_t *token) {
6550
6532
  return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
6551
6533
  case PM_CONTEXT_BEGIN:
6552
6534
  case PM_CONTEXT_RESCUE:
6535
+ case PM_CONTEXT_RESCUE_DEF:
6553
6536
  return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
6554
6537
  case PM_CONTEXT_RESCUE_ELSE:
6538
+ case PM_CONTEXT_RESCUE_ELSE_DEF:
6555
6539
  return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
6556
6540
  case PM_CONTEXT_LAMBDA_BRACES:
6557
6541
  return token->type == PM_TOKEN_BRACE_RIGHT;
@@ -6617,6 +6601,10 @@ context_def_p(pm_parser_t *parser) {
6617
6601
  while (context_node != NULL) {
6618
6602
  switch (context_node->context) {
6619
6603
  case PM_CONTEXT_DEF:
6604
+ case PM_CONTEXT_DEF_PARAMS:
6605
+ case PM_CONTEXT_ENSURE_DEF:
6606
+ case PM_CONTEXT_RESCUE_DEF:
6607
+ case PM_CONTEXT_RESCUE_ELSE_DEF:
6620
6608
  return true;
6621
6609
  case PM_CONTEXT_CLASS:
6622
6610
  case PM_CONTEXT_MODULE:
@@ -6979,9 +6967,16 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
6979
6967
  const uint8_t *end = parser->end;
6980
6968
  const uint8_t *current_start = parser->current.start;
6981
6969
  const uint8_t *current_end = parser->current.end;
6970
+ bool encoding_changed = parser->encoding_changed;
6982
6971
 
6983
- while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
6984
- current_end += width;
6972
+ if (encoding_changed) {
6973
+ while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
6974
+ current_end += width;
6975
+ }
6976
+ } else {
6977
+ while (current_end < end && (width = char_is_identifier_utf8(current_end, end)) > 0) {
6978
+ current_end += width;
6979
+ }
6985
6980
  }
6986
6981
  parser->current.end = current_end;
6987
6982
 
@@ -7099,8 +7094,8 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
7099
7094
  }
7100
7095
  }
7101
7096
 
7102
- if (parser->encoding_changed) {
7103
- return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
7097
+ if (encoding_changed) {
7098
+ return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
7104
7099
  }
7105
7100
  return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
7106
7101
  }
@@ -7314,7 +7309,18 @@ escape_byte(uint8_t value, const uint8_t flags) {
7314
7309
  * Write a unicode codepoint to the given buffer.
7315
7310
  */
7316
7311
  static inline void
7317
- escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t *start, const uint8_t *end, uint32_t value) {
7312
+ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
7313
+ // \u escape sequences in string-like structures implicitly change the
7314
+ // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
7315
+ // literal.
7316
+ if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
7317
+ if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
7318
+ PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
7319
+ }
7320
+
7321
+ parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
7322
+ }
7323
+
7318
7324
  if (value <= 0x7F) { // 0xxxxxxx
7319
7325
  pm_buffer_append_byte(buffer, (uint8_t) value);
7320
7326
  } else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
@@ -7337,6 +7343,23 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t *st
7337
7343
  }
7338
7344
  }
7339
7345
 
7346
+ /**
7347
+ * When you're writing a byte to the unescape buffer, if the byte is non-ASCII
7348
+ * (i.e., the top bit is set) then it locks in the encoding.
7349
+ */
7350
+ static inline void
7351
+ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
7352
+ if (byte >= 0x80) {
7353
+ if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7354
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
7355
+ }
7356
+
7357
+ parser->explicit_encoding = parser->encoding;
7358
+ }
7359
+
7360
+ pm_buffer_append_byte(buffer, byte);
7361
+ }
7362
+
7340
7363
  /**
7341
7364
  * The regular expression engine doesn't support the same escape sequences as
7342
7365
  * Ruby does. So first we have to read the escape sequence, and then we have to
@@ -7353,7 +7376,7 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t *st
7353
7376
  * source so that the regular expression engine will perform its own unescaping.
7354
7377
  */
7355
7378
  static inline void
7356
- escape_write_byte(pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
7379
+ escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
7357
7380
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
7358
7381
  pm_buffer_append_bytes(buffer, (const uint8_t *) "\\x", 2);
7359
7382
 
@@ -7372,7 +7395,7 @@ escape_write_byte(pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
7372
7395
  pm_buffer_append_byte(buffer, (uint8_t) (byte2 + '0'));
7373
7396
  }
7374
7397
  } else {
7375
- pm_buffer_append_byte(buffer, byte);
7398
+ escape_write_byte_encoded(parser, buffer, byte);
7376
7399
  }
7377
7400
  }
7378
7401
 
@@ -7384,57 +7407,57 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7384
7407
  switch (peek(parser)) {
7385
7408
  case '\\': {
7386
7409
  parser->current.end++;
7387
- pm_buffer_append_byte(buffer, '\\');
7410
+ escape_write_byte_encoded(parser, buffer, escape_byte('\\', flags));
7388
7411
  return;
7389
7412
  }
7390
7413
  case '\'': {
7391
7414
  parser->current.end++;
7392
- pm_buffer_append_byte(buffer, '\'');
7415
+ escape_write_byte_encoded(parser, buffer, escape_byte('\'', flags));
7393
7416
  return;
7394
7417
  }
7395
7418
  case 'a': {
7396
7419
  parser->current.end++;
7397
- pm_buffer_append_byte(buffer, '\a');
7420
+ escape_write_byte_encoded(parser, buffer, escape_byte('\a', flags));
7398
7421
  return;
7399
7422
  }
7400
7423
  case 'b': {
7401
7424
  parser->current.end++;
7402
- pm_buffer_append_byte(buffer, '\b');
7425
+ escape_write_byte_encoded(parser, buffer, escape_byte('\b', flags));
7403
7426
  return;
7404
7427
  }
7405
7428
  case 'e': {
7406
7429
  parser->current.end++;
7407
- pm_buffer_append_byte(buffer, '\033');
7430
+ escape_write_byte_encoded(parser, buffer, escape_byte('\033', flags));
7408
7431
  return;
7409
7432
  }
7410
7433
  case 'f': {
7411
7434
  parser->current.end++;
7412
- pm_buffer_append_byte(buffer, '\f');
7435
+ escape_write_byte_encoded(parser, buffer, escape_byte('\f', flags));
7413
7436
  return;
7414
7437
  }
7415
7438
  case 'n': {
7416
7439
  parser->current.end++;
7417
- pm_buffer_append_byte(buffer, '\n');
7440
+ escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
7418
7441
  return;
7419
7442
  }
7420
7443
  case 'r': {
7421
7444
  parser->current.end++;
7422
- pm_buffer_append_byte(buffer, '\r');
7445
+ escape_write_byte_encoded(parser, buffer, escape_byte('\r', flags));
7423
7446
  return;
7424
7447
  }
7425
7448
  case 's': {
7426
7449
  parser->current.end++;
7427
- pm_buffer_append_byte(buffer, ' ');
7450
+ escape_write_byte_encoded(parser, buffer, escape_byte(' ', flags));
7428
7451
  return;
7429
7452
  }
7430
7453
  case 't': {
7431
7454
  parser->current.end++;
7432
- pm_buffer_append_byte(buffer, '\t');
7455
+ escape_write_byte_encoded(parser, buffer, escape_byte('\t', flags));
7433
7456
  return;
7434
7457
  }
7435
7458
  case 'v': {
7436
7459
  parser->current.end++;
7437
- pm_buffer_append_byte(buffer, '\v');
7460
+ escape_write_byte_encoded(parser, buffer, escape_byte('\v', flags));
7438
7461
  return;
7439
7462
  }
7440
7463
  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
@@ -7451,7 +7474,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7451
7474
  }
7452
7475
  }
7453
7476
 
7454
- pm_buffer_append_byte(buffer, value);
7477
+ escape_write_byte_encoded(parser, buffer, value);
7455
7478
  return;
7456
7479
  }
7457
7480
  case 'x': {
@@ -7473,7 +7496,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7473
7496
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
7474
7497
  pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end - start));
7475
7498
  } else {
7476
- pm_buffer_append_byte(buffer, value);
7499
+ escape_write_byte_encoded(parser, buffer, value);
7477
7500
  }
7478
7501
  } else {
7479
7502
  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
@@ -7497,7 +7520,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7497
7520
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
7498
7521
  pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end + 4 - start));
7499
7522
  } else {
7500
- escape_write_unicode(parser, buffer, start, parser->current.end + 4, value);
7523
+ escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
7501
7524
  }
7502
7525
 
7503
7526
  parser->current.end += 4;
@@ -7531,13 +7554,14 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7531
7554
 
7532
7555
  if (!(flags & PM_ESCAPE_FLAG_REGEXP)) {
7533
7556
  uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
7534
- escape_write_unicode(parser, buffer, unicode_start, parser->current.end, value);
7557
+ escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
7535
7558
  }
7536
7559
 
7537
7560
  parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
7538
7561
  }
7539
7562
 
7540
- // ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
7563
+ // ?\u{nnnn} character literal should contain only one codepoint
7564
+ // and cannot be like ?\u{nnnn mmmm}.
7541
7565
  if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
7542
7566
  pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
7543
7567
  }
@@ -7568,7 +7592,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7568
7592
  switch (peeked) {
7569
7593
  case '?': {
7570
7594
  parser->current.end++;
7571
- escape_write_byte(buffer, flags, escape_byte(0x7f, flags));
7595
+ escape_write_byte(parser, buffer, flags, escape_byte(0x7f, flags));
7572
7596
  return;
7573
7597
  }
7574
7598
  case '\\':
@@ -7586,7 +7610,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7586
7610
  }
7587
7611
 
7588
7612
  parser->current.end++;
7589
- escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
7613
+ escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
7590
7614
  return;
7591
7615
  }
7592
7616
  }
@@ -7608,7 +7632,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7608
7632
  switch (peeked) {
7609
7633
  case '?': {
7610
7634
  parser->current.end++;
7611
- escape_write_byte(buffer, flags, escape_byte(0x7f, flags));
7635
+ escape_write_byte(parser, buffer, flags, escape_byte(0x7f, flags));
7612
7636
  return;
7613
7637
  }
7614
7638
  case '\\':
@@ -7626,7 +7650,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7626
7650
  }
7627
7651
 
7628
7652
  parser->current.end++;
7629
- escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
7653
+ escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
7630
7654
  return;
7631
7655
  }
7632
7656
  }
@@ -7661,20 +7685,20 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7661
7685
  }
7662
7686
 
7663
7687
  parser->current.end++;
7664
- escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
7688
+ escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
7665
7689
  return;
7666
7690
  }
7667
7691
  case '\r': {
7668
7692
  if (peek_offset(parser, 1) == '\n') {
7669
7693
  parser->current.end += 2;
7670
- pm_buffer_append_byte(buffer, '\n');
7694
+ escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
7671
7695
  return;
7672
7696
  }
7673
7697
  }
7674
7698
  /* fallthrough */
7675
7699
  default: {
7676
7700
  if (parser->current.end < parser->end) {
7677
- pm_buffer_append_byte(buffer, *parser->current.end++);
7701
+ escape_write_byte_encoded(parser, buffer, *parser->current.end++);
7678
7702
  }
7679
7703
  return;
7680
7704
  }
@@ -7737,13 +7761,12 @@ lex_question_mark(pm_parser_t *parser) {
7737
7761
 
7738
7762
  return PM_TOKEN_CHARACTER_LITERAL;
7739
7763
  } else {
7740
- size_t encoding_width = parser->encoding.char_width(parser->current.end, parser->end - parser->current.end);
7764
+ size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
7741
7765
 
7742
- // Ternary operators can have a ? immediately followed by an identifier which starts with
7743
- // an underscore. We check for this case
7766
+ // Ternary operators can have a ? immediately followed by an identifier
7767
+ // which starts with an underscore. We check for this case here.
7744
7768
  if (
7745
- !(parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end) ||
7746
- peek(parser) == '_') ||
7769
+ !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
7747
7770
  (
7748
7771
  (parser->current.end + encoding_width >= parser->end) ||
7749
7772
  !char_is_identifier(parser, parser->current.end + encoding_width)
@@ -7809,8 +7832,7 @@ parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
7809
7832
 
7810
7833
  *comment = (pm_comment_t) {
7811
7834
  .type = type,
7812
- .start = parser->current.start,
7813
- .end = parser->current.end
7835
+ .location = { parser->current.start, parser->current.end }
7814
7836
  };
7815
7837
 
7816
7838
  return comment;
@@ -7861,7 +7883,7 @@ lex_embdoc(pm_parser_t *parser) {
7861
7883
  parser->current.type = PM_TOKEN_EMBDOC_END;
7862
7884
  parser_lex_callback(parser);
7863
7885
 
7864
- comment->end = parser->current.end;
7886
+ comment->location.end = parser->current.end;
7865
7887
  pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
7866
7888
 
7867
7889
  return PM_TOKEN_EMBDOC_END;
@@ -7884,7 +7906,7 @@ lex_embdoc(pm_parser_t *parser) {
7884
7906
 
7885
7907
  pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
7886
7908
 
7887
- comment->end = parser->current.end;
7909
+ comment->location.end = parser->current.end;
7888
7910
  pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
7889
7911
 
7890
7912
  return PM_TOKEN_EOF;
@@ -8592,6 +8614,7 @@ parser_lex(pm_parser_t *parser) {
8592
8614
  // TODO: handle unterminated heredoc
8593
8615
  }
8594
8616
 
8617
+ parser->explicit_encoding = NULL;
8595
8618
  lex_mode_push(parser, (pm_lex_mode_t) {
8596
8619
  .mode = PM_LEX_HEREDOC,
8597
8620
  .as.heredoc = {
@@ -8998,7 +9021,7 @@ parser_lex(pm_parser_t *parser) {
8998
9021
  (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
8999
9022
  lex_state_spcarg_p(parser, space_seen)
9000
9023
  ) {
9001
- if (!parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end)) {
9024
+ if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
9002
9025
  if (*parser->current.end >= 0x80) {
9003
9026
  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
9004
9027
  }
@@ -9021,7 +9044,7 @@ parser_lex(pm_parser_t *parser) {
9021
9044
  // Delimiters for %-literals cannot be alphanumeric. We
9022
9045
  // validate that here.
9023
9046
  uint8_t delimiter = peek_offset(parser, 1);
9024
- if (delimiter >= 0x80 || parser->encoding.alnum_char(&delimiter, 1)) {
9047
+ if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
9025
9048
  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
9026
9049
  goto lex_next_token;
9027
9050
  }
@@ -9207,8 +9230,8 @@ parser_lex(pm_parser_t *parser) {
9207
9230
  parser->current.type = PM_TOKEN___END__;
9208
9231
  parser_lex_callback(parser);
9209
9232
 
9210
- pm_comment_t *comment = parser_comment(parser, PM_COMMENT___END__);
9211
- pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9233
+ parser->data_loc.start = parser->current.start;
9234
+ parser->data_loc.end = parser->current.end;
9212
9235
 
9213
9236
  LEX(PM_TOKEN_EOF);
9214
9237
  }
@@ -9437,7 +9460,9 @@ parser_lex(pm_parser_t *parser) {
9437
9460
 
9438
9461
  // If we were unable to find a breakpoint, then this token hits the
9439
9462
  // end of the file.
9440
- LEX(PM_TOKEN_EOF);
9463
+ parser->current.end = parser->end;
9464
+ pm_token_buffer_flush(parser, &token_buffer);
9465
+ LEX(PM_TOKEN_STRING_CONTENT);
9441
9466
  }
9442
9467
  case PM_LEX_REGEXP: {
9443
9468
  // First, we'll set to start of this token to be the current end.
@@ -9545,7 +9570,9 @@ parser_lex(pm_parser_t *parser) {
9545
9570
  case '\r':
9546
9571
  parser->current.end++;
9547
9572
  if (peek(parser) != '\n') {
9548
- pm_token_buffer_push(&token_buffer, '\\');
9573
+ if (lex_mode->as.regexp.terminator != '\r') {
9574
+ pm_token_buffer_push(&token_buffer, '\\');
9575
+ }
9549
9576
  pm_token_buffer_push(&token_buffer, '\r');
9550
9577
  break;
9551
9578
  }
@@ -9573,7 +9600,20 @@ parser_lex(pm_parser_t *parser) {
9573
9600
  escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_REGEXP);
9574
9601
  break;
9575
9602
  default:
9576
- if (lex_mode->as.regexp.terminator == '/' && peeked == '/') {
9603
+ if (lex_mode->as.regexp.terminator == peeked) {
9604
+ // Some characters when they are used as the
9605
+ // terminator also receive an escape. They are
9606
+ // enumerated here.
9607
+ switch (peeked) {
9608
+ case '$': case ')': case '*': case '+':
9609
+ case '.': case '>': case '?': case ']':
9610
+ case '^': case '|': case '}':
9611
+ pm_token_buffer_push(&token_buffer, '\\');
9612
+ break;
9613
+ default:
9614
+ break;
9615
+ }
9616
+
9577
9617
  pm_token_buffer_push(&token_buffer, peeked);
9578
9618
  parser->current.end++;
9579
9619
  break;
@@ -9626,7 +9666,9 @@ parser_lex(pm_parser_t *parser) {
9626
9666
 
9627
9667
  // If we were unable to find a breakpoint, then this token hits the
9628
9668
  // end of the file.
9629
- LEX(PM_TOKEN_EOF);
9669
+ parser->current.end = parser->end;
9670
+ pm_token_buffer_flush(parser, &token_buffer);
9671
+ LEX(PM_TOKEN_STRING_CONTENT);
9630
9672
  }
9631
9673
  case PM_LEX_STRING: {
9632
9674
  // First, we'll set to start of this token to be the current end.
@@ -9830,8 +9872,10 @@ parser_lex(pm_parser_t *parser) {
9830
9872
  }
9831
9873
 
9832
9874
  // If we've hit the end of the string, then this is an unterminated
9833
- // string. In that case we'll return the EOF token.
9834
- LEX(PM_TOKEN_EOF);
9875
+ // string. In that case we'll return a string content token.
9876
+ parser->current.end = parser->end;
9877
+ pm_token_buffer_flush(parser, &token_buffer);
9878
+ LEX(PM_TOKEN_STRING_CONTENT);
9835
9879
  }
9836
9880
  case PM_LEX_HEREDOC: {
9837
9881
  // First, we'll set to start of this token.
@@ -9860,24 +9904,42 @@ parser_lex(pm_parser_t *parser) {
9860
9904
  // terminator, then we need to return the ending of the heredoc.
9861
9905
  if (current_token_starts_line(parser)) {
9862
9906
  const uint8_t *start = parser->current.start;
9863
- size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
9907
+ if (start + ident_length <= parser->end) {
9908
+ const uint8_t *newline = next_newline(start, parser->end - start);
9909
+ const uint8_t *ident_end = newline;
9910
+ const uint8_t *terminator_end = newline;
9911
+
9912
+ if (newline == NULL) {
9913
+ terminator_end = parser->end;
9914
+ ident_end = parser->end;
9915
+ } else {
9916
+ terminator_end++;
9917
+ if (newline[-1] == '\r') {
9918
+ ident_end--; // Remove \r
9919
+ }
9920
+ }
9864
9921
 
9865
- if ((start + ident_length <= parser->end) && (memcmp(start, ident_start, ident_length) == 0)) {
9866
- bool matched = true;
9867
- bool at_end = false;
9922
+ const uint8_t *terminator_start = ident_end - ident_length;
9923
+ const uint8_t *cursor = start;
9868
9924
 
9869
- size_t eol_length = match_eol_at(parser, start + ident_length);
9870
- if (eol_length) {
9871
- parser->current.end = start + ident_length + eol_length;
9872
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9873
- } else if (parser->end == (start + ident_length)) {
9874
- parser->current.end = start + ident_length;
9875
- at_end = true;
9876
- } else {
9877
- matched = false;
9925
+ if (
9926
+ lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_DASH ||
9927
+ lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE
9928
+ ) {
9929
+ while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
9930
+ cursor++;
9931
+ }
9878
9932
  }
9879
9933
 
9880
- if (matched) {
9934
+ if (
9935
+ (cursor == terminator_start) &&
9936
+ (memcmp(terminator_start, ident_start, ident_length) == 0)
9937
+ ) {
9938
+ if (newline != NULL) {
9939
+ pm_newline_list_append(&parser->newline_list, newline);
9940
+ }
9941
+
9942
+ parser->current.end = terminator_end;
9881
9943
  if (*lex_mode->as.heredoc.next_start == '\\') {
9882
9944
  parser->next_start = NULL;
9883
9945
  } else {
@@ -9885,15 +9947,12 @@ parser_lex(pm_parser_t *parser) {
9885
9947
  parser->heredoc_end = parser->current.end;
9886
9948
  }
9887
9949
 
9888
- parser->current_string_common_whitespace = parser->lex_modes.current->as.heredoc.common_whitespace;
9889
- lex_mode_pop(parser);
9890
- if (!at_end) {
9891
- lex_state_set(parser, PM_LEX_STATE_END);
9892
- }
9950
+ lex_state_set(parser, PM_LEX_STATE_END);
9893
9951
  LEX(PM_TOKEN_HEREDOC_END);
9894
9952
  }
9895
9953
  }
9896
9954
 
9955
+ size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
9897
9956
  if (
9898
9957
  lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE &&
9899
9958
  (lex_mode->as.heredoc.common_whitespace > whitespace) &&
@@ -9937,23 +9996,35 @@ parser_lex(pm_parser_t *parser) {
9937
9996
  // If we have a - or ~ heredoc, then we can match after
9938
9997
  // some leading whitespace.
9939
9998
  const uint8_t *start = breakpoint + 1;
9940
- size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
9941
9999
 
9942
- // If we have hit a newline that is followed by a valid
9943
- // terminator, then we need to return the content of the
9944
- // heredoc here as string content. Then, the next time a
9945
- // token is lexed, it will match again and return the
9946
- // end of the heredoc.
9947
- if (
9948
- !was_escaped_newline &&
9949
- (start + ident_length <= parser->end) &&
9950
- (memcmp(start, ident_start, ident_length) == 0)
9951
- ) {
9952
- // Heredoc terminators must be followed by a
9953
- // newline, CRLF, or EOF to be valid.
10000
+ if (!was_escaped_newline && (start + ident_length <= parser->end)) {
10001
+ // We want to match the terminator starting from the end of the line in case
10002
+ // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
10003
+ const uint8_t *newline = next_newline(start, parser->end - start);
10004
+
10005
+ if (newline == NULL) {
10006
+ newline = parser->end;
10007
+ } else if (newline[-1] == '\r') {
10008
+ newline--; // Remove \r
10009
+ }
10010
+
10011
+ // Start of a possible terminator.
10012
+ const uint8_t *terminator_start = newline - ident_length;
10013
+
10014
+ // Cursor to check for the leading whitespace. We skip the
10015
+ // leading whitespace if we have a - or ~ heredoc.
10016
+ const uint8_t *cursor = start;
10017
+
10018
+ if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_DASH ||
10019
+ lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
10020
+ while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
10021
+ cursor++;
10022
+ }
10023
+ }
10024
+
9954
10025
  if (
9955
- start + ident_length == parser->end ||
9956
- match_eol_at(parser, start + ident_length)
10026
+ cursor == terminator_start &&
10027
+ (memcmp(terminator_start, ident_start, ident_length) == 0)
9957
10028
  ) {
9958
10029
  parser->current.end = breakpoint + 1;
9959
10030
  pm_token_buffer_flush(parser, &token_buffer);
@@ -9961,6 +10032,14 @@ parser_lex(pm_parser_t *parser) {
9961
10032
  }
9962
10033
  }
9963
10034
 
10035
+ size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
10036
+
10037
+ // If we have hit a newline that is followed by a valid
10038
+ // terminator, then we need to return the content of the
10039
+ // heredoc here as string content. Then, the next time a
10040
+ // token is lexed, it will match again and return the
10041
+ // end of the heredoc.
10042
+
9964
10043
  if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
9965
10044
  if ((lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
9966
10045
  lex_mode->as.heredoc.common_whitespace = whitespace;
@@ -10078,8 +10157,10 @@ parser_lex(pm_parser_t *parser) {
10078
10157
  }
10079
10158
 
10080
10159
  // If we've hit the end of the string, then this is an unterminated
10081
- // heredoc. In that case we'll return the EOF token.
10082
- LEX(PM_TOKEN_EOF);
10160
+ // heredoc. In that case we'll return a string content token.
10161
+ parser->current.end = parser->end;
10162
+ pm_token_buffer_flush(parser, &token_buffer);
10163
+ LEX(PM_TOKEN_STRING_CONTENT);
10083
10164
  }
10084
10165
  }
10085
10166
 
@@ -10101,32 +10182,33 @@ parser_lex(pm_parser_t *parser) {
10101
10182
  * specify their associativity by adding or subtracting one.
10102
10183
  */
10103
10184
  typedef enum {
10104
- PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
10105
- PM_BINDING_POWER_STATEMENT = 2,
10106
- PM_BINDING_POWER_MODIFIER = 4, // if unless until while
10107
- PM_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue
10108
- PM_BINDING_POWER_COMPOSITION = 8, // and or
10109
- PM_BINDING_POWER_NOT = 10, // not
10110
- PM_BINDING_POWER_MATCH = 12, // => in
10111
- PM_BINDING_POWER_DEFINED = 14, // defined?
10112
- PM_BINDING_POWER_ASSIGNMENT = 16, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
10113
- PM_BINDING_POWER_TERNARY = 18, // ?:
10114
- PM_BINDING_POWER_RANGE = 20, // .. ...
10115
- PM_BINDING_POWER_LOGICAL_OR = 22, // ||
10116
- PM_BINDING_POWER_LOGICAL_AND = 24, // &&
10117
- PM_BINDING_POWER_EQUALITY = 26, // <=> == === != =~ !~
10118
- PM_BINDING_POWER_COMPARISON = 28, // > >= < <=
10119
- PM_BINDING_POWER_BITWISE_OR = 30, // | ^
10120
- PM_BINDING_POWER_BITWISE_AND = 32, // &
10121
- PM_BINDING_POWER_SHIFT = 34, // << >>
10122
- PM_BINDING_POWER_TERM = 36, // + -
10123
- PM_BINDING_POWER_FACTOR = 38, // * / %
10124
- PM_BINDING_POWER_UMINUS = 40, // -@
10125
- PM_BINDING_POWER_EXPONENT = 42, // **
10126
- PM_BINDING_POWER_UNARY = 44, // ! ~ +@
10127
- PM_BINDING_POWER_INDEX = 46, // [] []=
10128
- PM_BINDING_POWER_CALL = 48, // :: .
10129
- PM_BINDING_POWER_MAX = 50
10185
+ PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
10186
+ PM_BINDING_POWER_STATEMENT = 2,
10187
+ PM_BINDING_POWER_MODIFIER = 4, // if unless until while
10188
+ PM_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue
10189
+ PM_BINDING_POWER_COMPOSITION = 8, // and or
10190
+ PM_BINDING_POWER_NOT = 10, // not
10191
+ PM_BINDING_POWER_MATCH = 12, // => in
10192
+ PM_BINDING_POWER_DEFINED = 14, // defined?
10193
+ PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
10194
+ PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
10195
+ PM_BINDING_POWER_TERNARY = 20, // ?:
10196
+ PM_BINDING_POWER_RANGE = 22, // .. ...
10197
+ PM_BINDING_POWER_LOGICAL_OR = 24, // ||
10198
+ PM_BINDING_POWER_LOGICAL_AND = 26, // &&
10199
+ PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
10200
+ PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
10201
+ PM_BINDING_POWER_BITWISE_OR = 32, // | ^
10202
+ PM_BINDING_POWER_BITWISE_AND = 34, // &
10203
+ PM_BINDING_POWER_SHIFT = 36, // << >>
10204
+ PM_BINDING_POWER_TERM = 38, // + -
10205
+ PM_BINDING_POWER_FACTOR = 40, // * / %
10206
+ PM_BINDING_POWER_UMINUS = 42, // -@
10207
+ PM_BINDING_POWER_EXPONENT = 44, // **
10208
+ PM_BINDING_POWER_UNARY = 46, // ! ~ +@
10209
+ PM_BINDING_POWER_INDEX = 48, // [] []=
10210
+ PM_BINDING_POWER_CALL = 50, // :: .
10211
+ PM_BINDING_POWER_MAX = 52
10130
10212
  } pm_binding_power_t;
10131
10213
 
10132
10214
  /**
@@ -10153,7 +10235,7 @@ typedef struct {
10153
10235
  #define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
10154
10236
  #define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
10155
10237
  #define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
10156
- #define NON_ASSOCIATIVE(precedence) { precedence + 1, precedence + 1, true, true }
10238
+ #define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
10157
10239
  #define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
10158
10240
 
10159
10241
  pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
@@ -10196,6 +10278,8 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
10196
10278
  // .. ...
10197
10279
  [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
10198
10280
  [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
10281
+ [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
10282
+ [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
10199
10283
 
10200
10284
  // ||
10201
10285
  [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
@@ -10204,12 +10288,12 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
10204
10288
  [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
10205
10289
 
10206
10290
  // != !~ == === =~ <=>
10207
- [PM_TOKEN_BANG_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10208
- [PM_TOKEN_BANG_TILDE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10209
- [PM_TOKEN_EQUAL_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10210
- [PM_TOKEN_EQUAL_EQUAL_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10211
- [PM_TOKEN_EQUAL_TILDE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10212
- [PM_TOKEN_LESS_EQUAL_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10291
+ [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10292
+ [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10293
+ [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10294
+ [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10295
+ [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10296
+ [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10213
10297
 
10214
10298
  // > >= < <=
10215
10299
  [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
@@ -10289,6 +10373,14 @@ match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
10289
10373
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
10290
10374
  }
10291
10375
 
10376
+ /**
10377
+ * Returns true if the current token is any of the four given types.
10378
+ */
10379
+ static inline bool
10380
+ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
10381
+ return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
10382
+ }
10383
+
10292
10384
  /**
10293
10385
  * Returns true if the current token is any of the five given types.
10294
10386
  */
@@ -10414,14 +10506,14 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
10414
10506
  }
10415
10507
 
10416
10508
  static pm_node_t *
10417
- parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id);
10509
+ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id);
10418
10510
 
10419
10511
  /**
10420
10512
  * This is a wrapper of parse_expression, which also checks whether the resulting node is value expression.
10421
10513
  */
10422
10514
  static pm_node_t *
10423
- parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
10424
- pm_node_t *node = parse_expression(parser, binding_power, diag_id);
10515
+ parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
10516
+ pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, diag_id);
10425
10517
  pm_assert_value_expression(parser, node);
10426
10518
  return node;
10427
10519
  }
@@ -10506,14 +10598,14 @@ token_begins_expression_p(pm_token_type_t type) {
10506
10598
  * prefixed by the * operator.
10507
10599
  */
10508
10600
  static pm_node_t *
10509
- parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
10601
+ parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
10510
10602
  if (accept1(parser, PM_TOKEN_USTAR)) {
10511
10603
  pm_token_t operator = parser->previous;
10512
- pm_node_t *expression = parse_value_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
10604
+ pm_node_t *expression = parse_value_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
10513
10605
  return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
10514
10606
  }
10515
10607
 
10516
- return parse_value_expression(parser, binding_power, diag_id);
10608
+ return parse_value_expression(parser, binding_power, accepts_command_call, diag_id);
10517
10609
  }
10518
10610
 
10519
10611
  /**
@@ -10621,7 +10713,6 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
10621
10713
  pm_node_destroy(parser, target);
10622
10714
 
10623
10715
  uint32_t depth = 0;
10624
- for (pm_scope_t *scope = parser->current_scope; scope && scope->transparent; depth++, scope = scope->previous);
10625
10716
  const pm_token_t name = { .type = PM_TOKEN_IDENTIFIER, .start = message.start, .end = message.end };
10626
10717
  target = (pm_node_t *) pm_local_variable_read_node_create(parser, &name, depth);
10627
10718
 
@@ -10632,25 +10723,17 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
10632
10723
  return target;
10633
10724
  }
10634
10725
 
10635
- if (*call->message_loc.start == '_' || parser->encoding.alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
10726
+ if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
10636
10727
  parse_write_name(parser, &call->name);
10637
- return (pm_node_t *) call;
10728
+ return (pm_node_t *) pm_call_target_node_create(parser, call);
10638
10729
  }
10639
10730
  }
10640
10731
 
10641
10732
  // If there is no call operator and the message is "[]" then this is
10642
10733
  // an aref expression, and we can transform it into an aset
10643
10734
  // expression.
10644
- if (
10645
- (call->call_operator_loc.start == NULL) &&
10646
- (call->message_loc.start != NULL) &&
10647
- (call->message_loc.start[0] == '[') &&
10648
- (call->message_loc.end[-1] == ']') &&
10649
- (call->block == NULL)
10650
- ) {
10651
- // Replace the name with "[]=".
10652
- call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
10653
- return target;
10735
+ if (pm_call_node_index_p(call)) {
10736
+ return (pm_node_t *) pm_index_target_node_create(parser, call);
10654
10737
  }
10655
10738
  }
10656
10739
  /* fallthrough */
@@ -10690,6 +10773,7 @@ static pm_node_t *
10690
10773
  parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
10691
10774
  switch (PM_NODE_TYPE(target)) {
10692
10775
  case PM_MISSING_NODE:
10776
+ pm_node_destroy(parser, value);
10693
10777
  return target;
10694
10778
  case PM_CLASS_VARIABLE_READ_NODE: {
10695
10779
  pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
@@ -10700,6 +10784,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10700
10784
  return (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
10701
10785
  case PM_CONSTANT_READ_NODE: {
10702
10786
  pm_constant_write_node_t *node = pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
10787
+ if (context_def_p(parser)) {
10788
+ pm_parser_err_node(parser, (pm_node_t *) node, PM_ERR_WRITE_TARGET_IN_METHOD);
10789
+ }
10703
10790
  pm_node_destroy(parser, target);
10704
10791
  return (pm_node_t *) node;
10705
10792
  }
@@ -10779,7 +10866,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10779
10866
  return target;
10780
10867
  }
10781
10868
 
10782
- if (*call->message_loc.start == '_' || parser->encoding.alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
10869
+ if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
10783
10870
  // When we get here, we have a method call, because it was
10784
10871
  // previously marked as a method call but now we have an =. This
10785
10872
  // looks like:
@@ -10797,6 +10884,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10797
10884
  call->base.location.end = arguments->base.location.end;
10798
10885
 
10799
10886
  parse_write_name(parser, &call->name);
10887
+ pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE);
10800
10888
  return (pm_node_t *) call;
10801
10889
  }
10802
10890
  }
@@ -10804,13 +10892,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10804
10892
  // If there is no call operator and the message is "[]" then this is
10805
10893
  // an aref expression, and we can transform it into an aset
10806
10894
  // expression.
10807
- if (
10808
- (call->call_operator_loc.start == NULL) &&
10809
- (call->message_loc.start != NULL) &&
10810
- (call->message_loc.start[0] == '[') &&
10811
- (call->message_loc.end[-1] == ']') &&
10812
- (call->block == NULL)
10813
- ) {
10895
+ if (pm_call_node_index_p(call)) {
10814
10896
  if (call->arguments == NULL) {
10815
10897
  call->arguments = pm_arguments_node_create(parser);
10816
10898
  }
@@ -10820,6 +10902,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10820
10902
 
10821
10903
  // Replace the name with "[]=".
10822
10904
  call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
10905
+ pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE);
10823
10906
  return target;
10824
10907
  }
10825
10908
 
@@ -10852,7 +10935,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10852
10935
  */
10853
10936
  static pm_node_t *
10854
10937
  parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power) {
10855
- bool has_splat = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
10938
+ bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
10856
10939
 
10857
10940
  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
10858
10941
  pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
@@ -10862,7 +10945,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
10862
10945
  // Here we have a splat operator. It can have a name or be
10863
10946
  // anonymous. It can be the final target or be in the middle if
10864
10947
  // there haven't been any others yet.
10865
- if (has_splat) {
10948
+ if (has_rest) {
10866
10949
  pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
10867
10950
  }
10868
10951
 
@@ -10870,24 +10953,23 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
10870
10953
  pm_node_t *name = NULL;
10871
10954
 
10872
10955
  if (token_begins_expression_p(parser->current.type)) {
10873
- name = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
10956
+ name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
10874
10957
  name = parse_target(parser, name);
10875
10958
  }
10876
10959
 
10877
10960
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
10878
10961
  pm_multi_target_node_targets_append(parser, result, splat);
10879
- has_splat = true;
10962
+ has_rest = true;
10880
10963
  } else if (token_begins_expression_p(parser->current.type)) {
10881
- pm_node_t *target = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
10964
+ pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
10882
10965
  target = parse_target(parser, target);
10883
10966
 
10884
10967
  pm_multi_target_node_targets_append(parser, result, target);
10885
10968
  } else if (!match1(parser, PM_TOKEN_EOF)) {
10886
10969
  // If we get here, then we have a trailing , in a multi target node.
10887
- // We need to indicate this somehow in the tree, so we'll add an
10888
- // anonymous splat.
10889
- pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &parser->previous, NULL);
10890
- pm_multi_target_node_targets_append(parser, result, splat);
10970
+ // We'll set the implicit rest flag to indicate this.
10971
+ pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
10972
+ pm_multi_target_node_targets_append(parser, result, rest);
10891
10973
  break;
10892
10974
  }
10893
10975
  }
@@ -10930,7 +11012,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
10930
11012
  context_push(parser, context);
10931
11013
 
10932
11014
  while (true) {
10933
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_CANNOT_PARSE_EXPRESSION);
11015
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
10934
11016
  pm_statements_node_body_append(statements, node);
10935
11017
 
10936
11018
  // If we're recovering from a syntax error, then we need to stop parsing the
@@ -10984,7 +11066,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
10984
11066
  }
10985
11067
 
10986
11068
  /**
10987
- * Parse all of the elements of a hash. eturns true if a double splat was found.
11069
+ * Parse all of the elements of a hash. returns true if a double splat was found.
10988
11070
  */
10989
11071
  static bool
10990
11072
  parse_assocs(pm_parser_t *parser, pm_node_t *node) {
@@ -11001,7 +11083,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
11001
11083
  pm_node_t *value = NULL;
11002
11084
 
11003
11085
  if (token_begins_expression_p(parser->current.type)) {
11004
- value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11086
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11005
11087
  } else if (pm_parser_local_depth(parser, &operator) == -1) {
11006
11088
  pm_parser_err_token(parser, &operator, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11007
11089
  }
@@ -11019,9 +11101,9 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
11019
11101
  pm_node_t *value = NULL;
11020
11102
 
11021
11103
  if (token_begins_expression_p(parser->current.type)) {
11022
- value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_EXPRESSION_AFTER_LABEL);
11104
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL);
11023
11105
  } else {
11024
- if (parser->encoding.isupper_char(label.start, (label.end - 1) - label.start)) {
11106
+ if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
11025
11107
  pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
11026
11108
  value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
11027
11109
  } else {
@@ -11043,7 +11125,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
11043
11125
  break;
11044
11126
  }
11045
11127
  default: {
11046
- pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_KEY);
11128
+ pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_KEY);
11047
11129
  pm_token_t operator;
11048
11130
 
11049
11131
  if (pm_symbol_node_label_p(key)) {
@@ -11053,7 +11135,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
11053
11135
  operator = parser->previous;
11054
11136
  }
11055
11137
 
11056
- pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_VALUE);
11138
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
11057
11139
  element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
11058
11140
  break;
11059
11141
  }
@@ -11136,15 +11218,11 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11136
11218
  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
11137
11219
  argument = (pm_node_t *) hash;
11138
11220
 
11139
- bool contains_keyword_splat = false;
11140
- if (!match7(parser, terminator, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
11141
- contains_keyword_splat = parse_assocs(parser, (pm_node_t *) hash);
11142
- }
11143
-
11221
+ bool contains_keyword_splat = parse_assocs(parser, (pm_node_t *) hash);
11144
11222
  parsed_bare_hash = true;
11145
11223
  parse_arguments_append(parser, arguments, argument);
11146
11224
  if (contains_keyword_splat) {
11147
- arguments->arguments->base.flags |= PM_ARGUMENTS_NODE_FLAGS_KEYWORD_SPLAT;
11225
+ pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
11148
11226
  }
11149
11227
  break;
11150
11228
  }
@@ -11154,9 +11232,15 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11154
11232
  pm_node_t *expression = NULL;
11155
11233
 
11156
11234
  if (token_begins_expression_p(parser->current.type)) {
11157
- expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
11158
- } else if (pm_parser_local_depth(parser, &operator) == -1) {
11159
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
11235
+ expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_ARGUMENT);
11236
+ } else {
11237
+ if (pm_parser_local_depth(parser, &operator) == -1) {
11238
+ // A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
11239
+ pm_constant_id_t ellipsis_id = pm_parser_constant_id_constant(parser, "...", 3);
11240
+ if (pm_parser_local_depth_constant_id(parser, ellipsis_id) == -1) {
11241
+ pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
11242
+ }
11243
+ }
11160
11244
  }
11161
11245
 
11162
11246
  argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
@@ -11173,14 +11257,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11173
11257
  parser_lex(parser);
11174
11258
  pm_token_t operator = parser->previous;
11175
11259
 
11176
- if (match3(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON)) {
11260
+ if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
11177
11261
  if (pm_parser_local_depth(parser, &parser->previous) == -1) {
11178
11262
  pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
11179
11263
  }
11180
11264
 
11181
11265
  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
11182
11266
  } else {
11183
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
11267
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
11184
11268
 
11185
11269
  if (parsed_bare_hash) {
11186
11270
  pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
@@ -11200,7 +11284,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11200
11284
  // If the token begins an expression then this ... was not actually
11201
11285
  // argument forwarding but was instead a range.
11202
11286
  pm_token_t operator = parser->previous;
11203
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
11287
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
11204
11288
  argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
11205
11289
  } else {
11206
11290
  if (pm_parser_local_depth(parser, &parser->previous) == -1) {
@@ -11220,7 +11304,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11220
11304
  /* fallthrough */
11221
11305
  default: {
11222
11306
  if (argument == NULL) {
11223
- argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
11307
+ argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, PM_ERR_EXPECT_ARGUMENT);
11224
11308
  }
11225
11309
 
11226
11310
  bool contains_keyword_splat = false;
@@ -11239,7 +11323,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11239
11323
  pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
11240
11324
 
11241
11325
  // Finish parsing the one we are part way through
11242
- pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_VALUE);
11326
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
11243
11327
 
11244
11328
  argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
11245
11329
  pm_keyword_hash_node_elements_append(bare_hash, argument);
@@ -11258,7 +11342,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11258
11342
 
11259
11343
  parse_arguments_append(parser, arguments, argument);
11260
11344
  if (contains_keyword_splat) {
11261
- arguments->arguments->base.flags |= PM_ARGUMENTS_NODE_FLAGS_KEYWORD_SPLAT;
11345
+ pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
11262
11346
  }
11263
11347
  break;
11264
11348
  }
@@ -11310,11 +11394,14 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
11310
11394
  do {
11311
11395
  pm_node_t *param;
11312
11396
 
11313
- // If we get here then we have a trailing comma. In this case we'll
11314
- // create an implicit splat node.
11397
+ // If we get here then we have a trailing comma, which isn't allowed in
11398
+ // the grammar. In other places, multi targets _do_ allow trailing
11399
+ // commas, so here we'll assume this is a mistake of the user not
11400
+ // knowing it's not allowed here.
11315
11401
  if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
11316
- param = (pm_node_t *) pm_splat_node_create(parser, &parser->previous, NULL);
11402
+ param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
11317
11403
  pm_multi_target_node_targets_append(parser, node, param);
11404
+ pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
11318
11405
  break;
11319
11406
  }
11320
11407
 
@@ -11545,10 +11632,14 @@ parse_parameters(
11545
11632
  if (accept1(parser, PM_TOKEN_EQUAL)) {
11546
11633
  pm_token_t operator = parser->previous;
11547
11634
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11548
- pm_node_t *value = parse_value_expression(parser, binding_power, PM_ERR_PARAMETER_NO_DEFAULT);
11635
+ pm_constant_id_t old_param_name = parser->current_param_name;
11636
+ parser->current_param_name = pm_parser_constant_id_token(parser, &name);
11637
+ pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
11549
11638
 
11550
11639
  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
11551
11640
  pm_parameters_node_optionals_append(params, param);
11641
+
11642
+ parser->current_param_name = old_param_name;
11552
11643
  context_pop(parser);
11553
11644
 
11554
11645
  // If parsing the value of the parameter resulted in error recovery,
@@ -11604,7 +11695,10 @@ parse_parameters(
11604
11695
 
11605
11696
  if (token_begins_expression_p(parser->current.type)) {
11606
11697
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11607
- pm_node_t *value = parse_value_expression(parser, binding_power, PM_ERR_PARAMETER_NO_DEFAULT_KW);
11698
+ pm_constant_id_t old_param_name = parser->current_param_name;
11699
+ parser->current_param_name = pm_parser_constant_id_token(parser, &local);
11700
+ pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
11701
+ parser->current_param_name = old_param_name;
11608
11702
  context_pop(parser);
11609
11703
  param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
11610
11704
  }
@@ -11647,12 +11741,12 @@ parse_parameters(
11647
11741
  }
11648
11742
  }
11649
11743
 
11650
- pm_rest_parameter_node_t *param = pm_rest_parameter_node_create(parser, &operator, &name);
11744
+ pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
11651
11745
  if (params->rest == NULL) {
11652
11746
  pm_parameters_node_rest_set(params, param);
11653
11747
  } else {
11654
- pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
11655
- pm_parameters_node_posts_append(params, (pm_node_t *) param);
11748
+ pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
11749
+ pm_parameters_node_posts_append(params, param);
11656
11750
  }
11657
11751
 
11658
11752
  break;
@@ -11697,11 +11791,9 @@ parse_parameters(
11697
11791
  default:
11698
11792
  if (parser->previous.type == PM_TOKEN_COMMA) {
11699
11793
  if (allows_trailing_comma) {
11700
- // If we get here, then we have a trailing comma in a block
11701
- // parameter list. We need to create an anonymous rest parameter to
11702
- // represent it.
11703
- pm_token_t name = not_provided(parser);
11704
- pm_rest_parameter_node_t *param = pm_rest_parameter_node_create(parser, &parser->previous, &name);
11794
+ // If we get here, then we have a trailing comma in a
11795
+ // block parameter list.
11796
+ pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
11705
11797
 
11706
11798
  if (params->rest == NULL) {
11707
11799
  pm_parameters_node_rest_set(params, param);
@@ -11739,7 +11831,7 @@ parse_parameters(
11739
11831
  * nodes pointing to each other from the top.
11740
11832
  */
11741
11833
  static inline void
11742
- parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11834
+ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, bool def_p) {
11743
11835
  pm_rescue_node_t *current = NULL;
11744
11836
 
11745
11837
  while (accept1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
@@ -11753,7 +11845,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11753
11845
  parser_lex(parser);
11754
11846
  pm_rescue_node_operator_set(rescue, &parser->previous);
11755
11847
 
11756
- pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_RESCUE_VARIABLE);
11848
+ pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
11757
11849
  reference = parse_target(parser, reference);
11758
11850
 
11759
11851
  pm_rescue_node_reference_set(rescue, reference);
@@ -11771,7 +11863,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11771
11863
  // we'll attempt to parse it here and any others delimited by commas.
11772
11864
 
11773
11865
  do {
11774
- pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_RESCUE_EXPRESSION);
11866
+ pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION);
11775
11867
  pm_rescue_node_exceptions_append(rescue, expression);
11776
11868
 
11777
11869
  // If we hit a newline, then this is the end of the rescue expression. We
@@ -11783,7 +11875,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11783
11875
  if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
11784
11876
  pm_rescue_node_operator_set(rescue, &parser->previous);
11785
11877
 
11786
- pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_RESCUE_VARIABLE);
11878
+ pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
11787
11879
  reference = parse_target(parser, reference);
11788
11880
 
11789
11881
  pm_rescue_node_reference_set(rescue, reference);
@@ -11802,7 +11894,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11802
11894
 
11803
11895
  if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
11804
11896
  pm_accepts_block_stack_push(parser, true);
11805
- pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_RESCUE);
11897
+ pm_statements_node_t *statements = parse_statements(parser, def_p ? PM_CONTEXT_RESCUE_DEF : PM_CONTEXT_RESCUE);
11806
11898
  if (statements) {
11807
11899
  pm_rescue_node_statements_set(rescue, statements);
11808
11900
  }
@@ -11838,7 +11930,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11838
11930
  pm_statements_node_t *else_statements = NULL;
11839
11931
  if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
11840
11932
  pm_accepts_block_stack_push(parser, true);
11841
- else_statements = parse_statements(parser, PM_CONTEXT_RESCUE_ELSE);
11933
+ else_statements = parse_statements(parser, def_p ? PM_CONTEXT_RESCUE_ELSE_DEF : PM_CONTEXT_RESCUE_ELSE);
11842
11934
  pm_accepts_block_stack_pop(parser);
11843
11935
  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
11844
11936
  }
@@ -11854,7 +11946,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11854
11946
  pm_statements_node_t *ensure_statements = NULL;
11855
11947
  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
11856
11948
  pm_accepts_block_stack_push(parser, true);
11857
- ensure_statements = parse_statements(parser, PM_CONTEXT_ENSURE);
11949
+ ensure_statements = parse_statements(parser, def_p ? PM_CONTEXT_ENSURE_DEF : PM_CONTEXT_ENSURE);
11858
11950
  pm_accepts_block_stack_pop(parser);
11859
11951
  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
11860
11952
  }
@@ -11872,10 +11964,10 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11872
11964
  }
11873
11965
 
11874
11966
  static inline pm_begin_node_t *
11875
- parse_rescues_as_begin(pm_parser_t *parser, pm_statements_node_t *statements) {
11967
+ parse_rescues_as_begin(pm_parser_t *parser, pm_statements_node_t *statements, bool def_p) {
11876
11968
  pm_token_t no_begin_token = not_provided(parser);
11877
11969
  pm_begin_node_t *begin_node = pm_begin_node_create(parser, &no_begin_token, statements);
11878
- parse_rescues(parser, begin_node);
11970
+ parse_rescues(parser, begin_node, def_p);
11879
11971
 
11880
11972
  // All nodes within a begin node are optional, so we look
11881
11973
  // for the earliest possible node that we can use to set
@@ -11941,24 +12033,30 @@ parse_block(pm_parser_t *parser) {
11941
12033
 
11942
12034
  pm_accepts_block_stack_push(parser, true);
11943
12035
  pm_parser_scope_push(parser, false);
11944
- pm_block_parameters_node_t *parameters = NULL;
12036
+ pm_block_parameters_node_t *block_parameters = NULL;
11945
12037
 
11946
12038
  if (accept1(parser, PM_TOKEN_PIPE)) {
11947
12039
  parser->current_scope->explicit_params = true;
11948
12040
  pm_token_t block_parameters_opening = parser->previous;
11949
12041
 
11950
12042
  if (match1(parser, PM_TOKEN_PIPE)) {
11951
- parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
12043
+ block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
11952
12044
  parser->command_start = true;
11953
12045
  parser_lex(parser);
11954
12046
  } else {
11955
- parameters = parse_block_parameters(parser, true, &block_parameters_opening, false);
12047
+ block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false);
11956
12048
  accept1(parser, PM_TOKEN_NEWLINE);
11957
12049
  parser->command_start = true;
11958
12050
  expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
11959
12051
  }
11960
12052
 
11961
- pm_block_parameters_node_closing_set(parameters, &parser->previous);
12053
+ pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
12054
+ }
12055
+
12056
+ uint32_t locals_body_index = 0;
12057
+
12058
+ if (block_parameters) {
12059
+ locals_body_index = (uint32_t) parser->current_scope->locals.size;
11962
12060
  }
11963
12061
 
11964
12062
  accept1(parser, PM_TOKEN_NEWLINE);
@@ -11980,17 +12078,25 @@ parse_block(pm_parser_t *parser) {
11980
12078
 
11981
12079
  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
11982
12080
  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
11983
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements);
12081
+ statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
11984
12082
  }
11985
12083
  }
11986
12084
 
11987
12085
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
11988
12086
  }
11989
12087
 
12088
+ pm_node_t *parameters = (pm_node_t *) block_parameters;
12089
+ uint8_t maximum = parser->current_scope->numbered_parameters;
12090
+
12091
+ if (parameters == NULL && (maximum > 0)) {
12092
+ parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = opening.start, .end = parser->previous.end }, maximum);
12093
+ locals_body_index = maximum;
12094
+ }
12095
+
11990
12096
  pm_constant_id_list_t locals = parser->current_scope->locals;
11991
12097
  pm_parser_scope_pop(parser);
11992
12098
  pm_accepts_block_stack_pop(parser);
11993
- return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
12099
+ return pm_block_node_create(parser, &locals, locals_body_index, &opening, parameters, statements, &parser->previous);
11994
12100
  }
11995
12101
 
11996
12102
  /**
@@ -11999,7 +12105,7 @@ parse_block(pm_parser_t *parser) {
11999
12105
  * arguments, or blocks).
12000
12106
  */
12001
12107
  static bool
12002
- parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block) {
12108
+ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call) {
12003
12109
  bool found = false;
12004
12110
 
12005
12111
  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
@@ -12016,7 +12122,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
12016
12122
 
12017
12123
  arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
12018
12124
  }
12019
- } else if ((token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
12125
+ } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
12020
12126
  found |= true;
12021
12127
  pm_accepts_block_stack_push(parser, false);
12022
12128
 
@@ -12071,7 +12177,7 @@ static inline pm_node_t *
12071
12177
  parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword) {
12072
12178
  context_push(parser, PM_CONTEXT_PREDICATE);
12073
12179
  pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
12074
- pm_node_t *predicate = parse_value_expression(parser, binding_power, error_id);
12180
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, error_id);
12075
12181
 
12076
12182
  // Predicates are closed by a term, a "then", or a term and then a "then".
12077
12183
  bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
@@ -12266,6 +12372,26 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
12266
12372
  case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
12267
12373
  case PM_NUMBERED_REFERENCE_READ_NODE
12268
12374
 
12375
+ // Assert here that the flags are the same so that we can safely switch the type
12376
+ // of the node without having to move the flags.
12377
+ PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
12378
+
12379
+ /**
12380
+ * If the encoding was explicitly set through the lexing process, then we need
12381
+ * to potentially mark the string's flags to indicate how to encode it.
12382
+ */
12383
+ static inline pm_node_flags_t
12384
+ parse_unescaped_encoding(const pm_parser_t *parser) {
12385
+ if (parser->explicit_encoding != NULL) {
12386
+ if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
12387
+ return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
12388
+ } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
12389
+ return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
12390
+ }
12391
+ }
12392
+ return 0;
12393
+ }
12394
+
12269
12395
  /**
12270
12396
  * Parse a node that is part of a string. If the subsequent tokens cannot be
12271
12397
  * parsed as a string part, then NULL is returned.
@@ -12282,7 +12408,9 @@ parse_string_part(pm_parser_t *parser) {
12282
12408
  case PM_TOKEN_STRING_CONTENT: {
12283
12409
  pm_token_t opening = not_provided(parser);
12284
12410
  pm_token_t closing = not_provided(parser);
12411
+
12285
12412
  pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
12413
+ pm_node_flag_set(node, parse_unescaped_encoding(parser));
12286
12414
 
12287
12415
  parser_lex(parser);
12288
12416
  return node;
@@ -12451,7 +12579,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12451
12579
  }
12452
12580
 
12453
12581
  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
12454
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
12582
+ if (match1(parser, PM_TOKEN_EOF)) {
12583
+ pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
12584
+ } else {
12585
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
12586
+ }
12455
12587
 
12456
12588
  return (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &node_list, &parser->previous);
12457
12589
  }
@@ -12463,6 +12595,34 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12463
12595
  content = parser->current;
12464
12596
  unescaped = parser->current_string;
12465
12597
  parser_lex(parser);
12598
+
12599
+ // If we have two string contents in a row, then the content of this
12600
+ // symbol is split because of heredoc contents. This looks like:
12601
+ //
12602
+ // <<A; :'a
12603
+ // A
12604
+ // b'
12605
+ //
12606
+ // In this case, the best way we have to represent this is as an
12607
+ // interpolated string node, so that's what we'll do here.
12608
+ if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
12609
+ pm_node_list_t parts = { 0 };
12610
+ pm_token_t bounds = not_provided(parser);
12611
+
12612
+ pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
12613
+ pm_node_list_append(&parts, part);
12614
+
12615
+ part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
12616
+ pm_node_list_append(&parts, part);
12617
+
12618
+ if (next_state != PM_LEX_STATE_NONE) {
12619
+ lex_state_set(parser, next_state);
12620
+ }
12621
+
12622
+ parser_lex(parser);
12623
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
12624
+ return (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
12625
+ }
12466
12626
  } else {
12467
12627
  content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
12468
12628
  pm_string_shared_init(&unescaped, content.start, content.end);
@@ -12472,7 +12632,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12472
12632
  lex_state_set(parser, next_state);
12473
12633
  }
12474
12634
 
12475
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
12635
+ if (match1(parser, PM_TOKEN_EOF)) {
12636
+ pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
12637
+ } else {
12638
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
12639
+ }
12476
12640
  return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
12477
12641
  }
12478
12642
 
@@ -12561,9 +12725,9 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
12561
12725
  * numbered parameters.
12562
12726
  */
12563
12727
  static bool
12564
- outer_scope_using_numbered_params_p(pm_parser_t *parser) {
12728
+ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
12565
12729
  for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
12566
- if (scope->numbered_params) return true;
12730
+ if (scope->numbered_parameters) return true;
12567
12731
  }
12568
12732
 
12569
12733
  return false;
@@ -12583,25 +12747,32 @@ parse_variable_call(pm_parser_t *parser) {
12583
12747
  }
12584
12748
 
12585
12749
  if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
12586
- // Indicate that this scope is using numbered params so that child
12587
- // scopes cannot.
12588
- parser->current_scope->numbered_params = true;
12589
-
12590
12750
  // Now that we know we have a numbered parameter, we need to check
12591
12751
  // if it's allowed in this context. If it is, then we will create a
12592
12752
  // local variable read. If it's not, then we'll create a normal call
12593
12753
  // node but add an error.
12594
12754
  if (parser->current_scope->explicit_params) {
12595
12755
  pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
12596
- } else if (outer_scope_using_numbered_params_p(parser)) {
12756
+ } else if (outer_scope_using_numbered_parameters_p(parser)) {
12597
12757
  pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
12598
12758
  } else {
12759
+ // Indicate that this scope is using numbered params so that child
12760
+ // scopes cannot.
12761
+ uint8_t number = parser->previous.start[1];
12762
+
12763
+ // We subtract the value for the character '0' to get the actual
12764
+ // integer value of the number (only _1 through _9 are valid)
12765
+ uint8_t numbered_parameters = (uint8_t) (number - '0');
12766
+ if (numbered_parameters > parser->current_scope->numbered_parameters) {
12767
+ parser->current_scope->numbered_parameters = numbered_parameters;
12768
+ pm_parser_numbered_parameters_set(parser, numbered_parameters);
12769
+ }
12770
+
12599
12771
  // When you use a numbered parameter, it implies the existence
12600
12772
  // of all of the locals that exist before it. For example,
12601
12773
  // referencing _2 means that _1 must exist. Therefore here we
12602
12774
  // loop through all of the possibilities and add them into the
12603
12775
  // constant pool.
12604
- uint8_t number = parser->previous.start[1];
12605
12776
  uint8_t current = '1';
12606
12777
  uint8_t *value;
12607
12778
 
@@ -12624,7 +12795,7 @@ parse_variable_call(pm_parser_t *parser) {
12624
12795
  }
12625
12796
 
12626
12797
  pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
12627
- node->base.flags |= flags;
12798
+ pm_node_flag_set((pm_node_t *)node, flags);
12628
12799
 
12629
12800
  return (pm_node_t *) node;
12630
12801
  }
@@ -12803,7 +12974,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
12803
12974
  case PM_ARRAY_PATTERN_NODE: {
12804
12975
  pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
12805
12976
 
12806
- if (pattern_node->constant == NULL) {
12977
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
12807
12978
  pattern_node->base.location.start = node->location.start;
12808
12979
  pattern_node->base.location.end = closing.end;
12809
12980
 
@@ -12819,7 +12990,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
12819
12990
  case PM_FIND_PATTERN_NODE: {
12820
12991
  pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
12821
12992
 
12822
- if (pattern_node->constant == NULL) {
12993
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
12823
12994
  pattern_node->base.location.start = node->location.start;
12824
12995
  pattern_node->base.location.end = closing.end;
12825
12996
 
@@ -12835,7 +13006,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
12835
13006
  case PM_HASH_PATTERN_NODE: {
12836
13007
  pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
12837
13008
 
12838
- if (pattern_node->constant == NULL) {
13009
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
12839
13010
  pattern_node->base.location.start = node->location.start;
12840
13011
  pattern_node->base.location.end = closing.end;
12841
13012
 
@@ -12951,10 +13122,15 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
12951
13122
  break;
12952
13123
  }
12953
13124
 
12954
- pm_node_t *assoc;
12955
-
12956
13125
  if (match1(parser, PM_TOKEN_USTAR_STAR)) {
12957
- assoc = parse_pattern_keyword_rest(parser);
13126
+ pm_node_t *assoc = parse_pattern_keyword_rest(parser);
13127
+
13128
+ if (rest == NULL) {
13129
+ rest = assoc;
13130
+ } else {
13131
+ pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
13132
+ pm_node_list_append(&assocs, assoc);
13133
+ }
12958
13134
  } else {
12959
13135
  expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
12960
13136
  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
@@ -12968,10 +13144,14 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
12968
13144
  }
12969
13145
 
12970
13146
  pm_token_t operator = not_provided(parser);
12971
- assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
12972
- }
13147
+ pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
13148
+
13149
+ if (rest != NULL) {
13150
+ pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
13151
+ }
12973
13152
 
12974
- pm_node_list_append(&assocs, assoc);
13153
+ pm_node_list_append(&assocs, assoc);
13154
+ }
12975
13155
  }
12976
13156
 
12977
13157
  pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
@@ -12989,8 +13169,13 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
12989
13169
  case PM_TOKEN_IDENTIFIER:
12990
13170
  case PM_TOKEN_METHOD_NAME: {
12991
13171
  parser_lex(parser);
12992
- pm_parser_local_add_token(parser, &parser->previous);
12993
- return (pm_node_t *) pm_local_variable_target_node_create(parser, &parser->previous);
13172
+ pm_token_t name = parser->previous;
13173
+ int depth = pm_parser_local_depth(parser, &name);
13174
+ if (depth < 0) {
13175
+ depth = 0;
13176
+ pm_parser_local_add_token(parser, &name);
13177
+ }
13178
+ return (pm_node_t *) pm_local_variable_target_node_create_depth(parser, &name, (uint32_t) depth);
12994
13179
  }
12995
13180
  case PM_TOKEN_BRACKET_LEFT_ARRAY: {
12996
13181
  pm_token_t opening = parser->current;
@@ -13077,7 +13262,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13077
13262
  first_assoc = parse_pattern_keyword_rest(parser);
13078
13263
  break;
13079
13264
  case PM_TOKEN_STRING_BEGIN: {
13080
- pm_node_t *key = parse_expression(parser, PM_BINDING_POWER_MAX, PM_ERR_PATTERN_HASH_KEY);
13265
+ pm_node_t *key = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
13081
13266
  pm_token_t operator = not_provided(parser);
13082
13267
 
13083
13268
  if (!pm_symbol_node_label_p(key)) {
@@ -13124,7 +13309,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13124
13309
  // expression as the right side of the range.
13125
13310
  switch (parser->current.type) {
13126
13311
  case PM_CASE_PRIMITIVE: {
13127
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
13312
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
13128
13313
  return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
13129
13314
  }
13130
13315
  default: {
@@ -13135,7 +13320,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13135
13320
  }
13136
13321
  }
13137
13322
  case PM_CASE_PRIMITIVE: {
13138
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, diag_id);
13323
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, diag_id);
13139
13324
 
13140
13325
  // Now that we have a primitive, we need to check if it's part of a range.
13141
13326
  if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
@@ -13146,7 +13331,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13146
13331
  // node. Otherwise, we'll create an endless range.
13147
13332
  switch (parser->current.type) {
13148
13333
  case PM_CASE_PRIMITIVE: {
13149
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
13334
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
13150
13335
  return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
13151
13336
  }
13152
13337
  default:
@@ -13206,7 +13391,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13206
13391
  pm_token_t lparen = parser->current;
13207
13392
  parser_lex(parser);
13208
13393
 
13209
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
13394
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
13210
13395
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
13211
13396
 
13212
13397
  accept1(parser, PM_TOKEN_NEWLINE);
@@ -13307,9 +13492,13 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13307
13492
 
13308
13493
  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
13309
13494
  pm_token_t identifier = parser->previous;
13310
- pm_parser_local_add_token(parser, &identifier);
13495
+ int depth = pm_parser_local_depth(parser, &identifier);
13496
+ if (depth < 0) {
13497
+ depth = 0;
13498
+ pm_parser_local_add_token(parser, &identifier);
13499
+ }
13311
13500
 
13312
- pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &identifier);
13501
+ pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create_depth(parser, &identifier, (uint32_t) depth);
13313
13502
  node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
13314
13503
  }
13315
13504
 
@@ -13370,6 +13559,8 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
13370
13559
  while (accept1(parser, PM_TOKEN_COMMA)) {
13371
13560
  // Break early here in case we have a trailing comma.
13372
13561
  if (match5(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13562
+ node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13563
+ pm_node_list_append(&nodes, node);
13373
13564
  break;
13374
13565
  }
13375
13566
 
@@ -13460,13 +13651,15 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13460
13651
 
13461
13652
  // Here we have found a string literal. We'll parse it and add it to
13462
13653
  // the list of strings.
13463
- assert(parser->lex_modes.current->mode == PM_LEX_STRING);
13464
- bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
13654
+ const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
13655
+ assert(lex_mode->mode == PM_LEX_STRING);
13656
+ bool lex_interpolation = lex_mode->as.string.interpolation;
13465
13657
 
13466
13658
  pm_token_t opening = parser->current;
13467
13659
  parser_lex(parser);
13468
13660
 
13469
- if (accept1(parser, PM_TOKEN_STRING_END)) {
13661
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
13662
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13470
13663
  // If we get here, then we have an end immediately after a
13471
13664
  // start. In that case we'll create an empty content token and
13472
13665
  // return an uninterpolated string.
@@ -13489,15 +13682,16 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13489
13682
  // If we don't accept interpolation then we expect the string to
13490
13683
  // start with a single string content node.
13491
13684
  pm_string_t unescaped;
13685
+ pm_token_t content;
13492
13686
  if (match1(parser, PM_TOKEN_EOF)) {
13493
13687
  unescaped = PM_STRING_EMPTY;
13688
+ content = not_provided(parser);
13494
13689
  } else {
13495
13690
  unescaped = parser->current_string;
13691
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
13692
+ content = parser->previous;
13496
13693
  }
13497
13694
 
13498
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
13499
- pm_token_t content = parser->previous;
13500
-
13501
13695
  // It is unfortunately possible to have multiple string content
13502
13696
  // nodes in a row in the case that there's heredoc content in
13503
13697
  // the middle of the string, like this cursed example:
@@ -13526,6 +13720,9 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13526
13720
  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
13527
13721
  } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
13528
13722
  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
13723
+ } else if (match1(parser, PM_TOKEN_EOF)) {
13724
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_TERM);
13725
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
13529
13726
  } else {
13530
13727
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13531
13728
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
@@ -13539,9 +13736,10 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13539
13736
  pm_string_t unescaped = parser->current_string;
13540
13737
  parser_lex(parser);
13541
13738
 
13542
- if (match1(parser, PM_TOKEN_STRING_END)) {
13739
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
13543
13740
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
13544
- parser_lex(parser);
13741
+ pm_node_flag_set(node, parse_unescaped_encoding(parser));
13742
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13545
13743
  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
13546
13744
  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
13547
13745
  } else {
@@ -13552,6 +13750,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13552
13750
  pm_token_t string_closing = not_provided(parser);
13553
13751
 
13554
13752
  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
13753
+ pm_node_flag_set(part, parse_unescaped_encoding(parser));
13555
13754
  pm_node_list_append(&parts, part);
13556
13755
 
13557
13756
  while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
@@ -13562,6 +13761,9 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13562
13761
 
13563
13762
  if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
13564
13763
  node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
13764
+ } else if (match1(parser, PM_TOKEN_EOF)) {
13765
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
13766
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
13565
13767
  } else {
13566
13768
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
13567
13769
  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
@@ -13582,6 +13784,9 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13582
13784
 
13583
13785
  if (accept1(parser, PM_TOKEN_LABEL_END)) {
13584
13786
  node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
13787
+ } else if (match1(parser, PM_TOKEN_EOF)) {
13788
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
13789
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
13585
13790
  } else {
13586
13791
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
13587
13792
  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
@@ -13629,7 +13834,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13629
13834
  * Parse an expression that begins with the previous node that we just lexed.
13630
13835
  */
13631
13836
  static inline pm_node_t *
13632
- parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13837
+ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
13633
13838
  switch (parser->current.type) {
13634
13839
  case PM_TOKEN_BRACKET_LEFT_ARRAY: {
13635
13840
  parser_lex(parser);
@@ -13665,7 +13870,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13665
13870
  pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
13666
13871
  }
13667
13872
  } else {
13668
- expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR);
13873
+ expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR);
13669
13874
  }
13670
13875
 
13671
13876
  element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
@@ -13683,7 +13888,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13683
13888
 
13684
13889
  parsed_bare_hash = true;
13685
13890
  } else {
13686
- element = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_ARRAY_EXPRESSION);
13891
+ element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
13687
13892
 
13688
13893
  if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
13689
13894
  if (parsed_bare_hash) {
@@ -13699,7 +13904,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13699
13904
  operator = not_provided(parser);
13700
13905
  }
13701
13906
 
13702
- pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_VALUE);
13907
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
13703
13908
  pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
13704
13909
  pm_keyword_hash_node_elements_append(hash, assoc);
13705
13910
 
@@ -13740,7 +13945,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13740
13945
  // of statements within the parentheses.
13741
13946
  pm_accepts_block_stack_push(parser, true);
13742
13947
  context_push(parser, PM_CONTEXT_PARENS);
13743
- pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_CANNOT_PARSE_EXPRESSION);
13948
+ pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
13744
13949
  context_pop(parser);
13745
13950
 
13746
13951
  // Determine if this statement is followed by a terminator. In the
@@ -13816,7 +14021,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13816
14021
 
13817
14022
  // Parse each statement within the parentheses.
13818
14023
  while (true) {
13819
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_CANNOT_PARSE_EXPRESSION);
14024
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
13820
14025
  pm_statements_node_body_append(statements, node);
13821
14026
 
13822
14027
  // If we're recovering from a syntax error, then we need to stop
@@ -13879,6 +14084,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13879
14084
 
13880
14085
  pm_token_t closing = not_provided(parser);
13881
14086
  pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
14087
+ pm_node_flag_set(node, parse_unescaped_encoding(parser));
13882
14088
 
13883
14089
  // Characters can be followed by strings in which case they are
13884
14090
  // automatically concatenated.
@@ -13906,11 +14112,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13906
14112
  // fact a method call, not a constant read.
13907
14113
  if (
13908
14114
  match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
13909
- (binding_power <= PM_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14115
+ (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
13910
14116
  (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
13911
14117
  ) {
13912
14118
  pm_arguments_t arguments = { 0 };
13913
- parse_arguments_list(parser, &arguments, true);
14119
+ parse_arguments_list(parser, &arguments, true, accepts_command_call);
13914
14120
  return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
13915
14121
  }
13916
14122
 
@@ -13944,7 +14150,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13944
14150
  pm_token_t operator = parser->current;
13945
14151
  parser_lex(parser);
13946
14152
 
13947
- pm_node_t *right = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
14153
+ pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
13948
14154
  return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
13949
14155
  }
13950
14156
  case PM_TOKEN_FLOAT:
@@ -14003,10 +14209,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14003
14209
  pm_call_node_t *call = (pm_call_node_t *) node;
14004
14210
  pm_arguments_t arguments = { 0 };
14005
14211
 
14006
- if (parse_arguments_list(parser, &arguments, true)) {
14212
+ if (parse_arguments_list(parser, &arguments, true, accepts_command_call)) {
14007
14213
  // Since we found arguments, we need to turn off the
14008
14214
  // variable call bit in the flags.
14009
- call->base.flags &= (pm_node_flags_t) ~PM_CALL_NODE_FLAGS_VARIABLE_CALL;
14215
+ pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
14010
14216
 
14011
14217
  call->opening_loc = arguments.opening_loc;
14012
14218
  call->arguments = arguments.arguments;
@@ -14030,11 +14236,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14030
14236
  // can still be a method call if it is followed by arguments or
14031
14237
  // a block, so we need to check for that here.
14032
14238
  if (
14033
- (binding_power <= PM_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14239
+ (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14034
14240
  (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
14035
14241
  ) {
14036
14242
  pm_arguments_t arguments = { 0 };
14037
- parse_arguments_list(parser, &arguments, true);
14243
+ parse_arguments_list(parser, &arguments, true, accepts_command_call);
14038
14244
 
14039
14245
  pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
14040
14246
  pm_node_destroy(parser, node);
@@ -14065,7 +14271,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14065
14271
  if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
14066
14272
  // If we get here, then we have an empty heredoc. We'll create
14067
14273
  // an empty content token and return an empty string node.
14068
- lex_state_set(parser, PM_LEX_STATE_END);
14274
+ lex_mode_pop(parser);
14069
14275
  expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
14070
14276
  pm_token_t content = parse_strings_empty_content(parser->previous.start);
14071
14277
 
@@ -14086,6 +14292,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14086
14292
  // content and we're at the end of the heredoc, so we can return
14087
14293
  // just a string node with the heredoc opening and closing as
14088
14294
  // its opening and closing.
14295
+ pm_node_flag_set(part, parse_unescaped_encoding(parser));
14089
14296
  pm_string_node_t *cast = (pm_string_node_t *) part;
14090
14297
 
14091
14298
  cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
@@ -14097,13 +14304,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14097
14304
  cast->base.type = PM_X_STRING_NODE;
14098
14305
  }
14099
14306
 
14100
- size_t common_whitespace = parser->current_string_common_whitespace;
14307
+ size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
14101
14308
  if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
14102
14309
  parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
14103
14310
  }
14104
14311
 
14105
14312
  node = (pm_node_t *) cast;
14106
- lex_state_set(parser, PM_LEX_STATE_END);
14313
+ lex_mode_pop(parser);
14107
14314
  expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
14108
14315
  } else {
14109
14316
  // If we get here, then we have multiple parts in the heredoc,
@@ -14118,13 +14325,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14118
14325
  }
14119
14326
  }
14120
14327
 
14328
+ size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
14329
+
14121
14330
  // Now that we have all of the parts, create the correct type of
14122
14331
  // interpolated node.
14123
14332
  if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
14124
14333
  pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
14125
14334
  cast->parts = parts;
14126
14335
 
14127
- lex_state_set(parser, PM_LEX_STATE_END);
14336
+ lex_mode_pop(parser);
14128
14337
  expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
14129
14338
 
14130
14339
  pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
@@ -14133,7 +14342,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14133
14342
  } else {
14134
14343
  pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
14135
14344
 
14136
- lex_state_set(parser, PM_LEX_STATE_END);
14345
+ lex_mode_pop(parser);
14137
14346
  expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
14138
14347
 
14139
14348
  pm_interpolated_string_node_closing_set(cast, &parser->previous);
@@ -14143,7 +14352,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14143
14352
 
14144
14353
  // If this is a heredoc that is indented with a ~, then we need
14145
14354
  // to dedent each line by the common leading whitespace.
14146
- size_t common_whitespace = parser->current_string_common_whitespace;
14147
14355
  if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
14148
14356
  pm_node_list_t *nodes;
14149
14357
  if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
@@ -14202,6 +14410,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14202
14410
  parser_lex(parser);
14203
14411
  return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
14204
14412
  case PM_TOKEN_KEYWORD_ALIAS: {
14413
+ if (binding_power != PM_BINDING_POWER_STATEMENT) {
14414
+ pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
14415
+ }
14416
+
14205
14417
  parser_lex(parser);
14206
14418
  pm_token_t keyword = parser->previous;
14207
14419
 
@@ -14246,7 +14458,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14246
14458
  } else if (!token_begins_expression_p(parser->current.type)) {
14247
14459
  predicate = NULL;
14248
14460
  } else {
14249
- predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CASE_EXPRESSION_AFTER_CASE);
14461
+ predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CASE_EXPRESSION_AFTER_CASE);
14250
14462
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
14251
14463
  }
14252
14464
 
@@ -14273,14 +14485,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14273
14485
  do {
14274
14486
  if (accept1(parser, PM_TOKEN_USTAR)) {
14275
14487
  pm_token_t operator = parser->previous;
14276
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
14488
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
14277
14489
 
14278
14490
  pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
14279
14491
  pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
14280
14492
 
14281
14493
  if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
14282
14494
  } else {
14283
- pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CASE_EXPRESSION_AFTER_WHEN);
14495
+ pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN);
14284
14496
  pm_when_node_conditions_append(when_node, condition);
14285
14497
 
14286
14498
  if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
@@ -14337,11 +14549,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14337
14549
  // for guard clauses in the form of `if` or `unless` statements.
14338
14550
  if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
14339
14551
  pm_token_t keyword = parser->previous;
14340
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CONDITIONAL_IF_PREDICATE);
14552
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
14341
14553
  pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
14342
14554
  } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
14343
14555
  pm_token_t keyword = parser->previous;
14344
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
14556
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
14345
14557
  pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
14346
14558
  }
14347
14559
 
@@ -14426,7 +14638,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14426
14638
  }
14427
14639
 
14428
14640
  pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
14429
- parse_rescues(parser, begin_node);
14641
+ parse_rescues(parser, begin_node, false);
14430
14642
 
14431
14643
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
14432
14644
  begin_node->base.location.end = parser->previous.end;
@@ -14439,6 +14651,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14439
14651
  return (pm_node_t *) begin_node;
14440
14652
  }
14441
14653
  case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
14654
+ if (binding_power != PM_BINDING_POWER_STATEMENT) {
14655
+ pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
14656
+ }
14657
+
14442
14658
  parser_lex(parser);
14443
14659
  pm_token_t keyword = parser->previous;
14444
14660
 
@@ -14496,7 +14712,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14496
14712
 
14497
14713
  pm_token_t keyword = parser->previous;
14498
14714
  pm_arguments_t arguments = { 0 };
14499
- parse_arguments_list(parser, &arguments, true);
14715
+ parse_arguments_list(parser, &arguments, true, accepts_command_call);
14500
14716
 
14501
14717
  if (
14502
14718
  arguments.opening_loc.start == NULL &&
@@ -14513,7 +14729,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14513
14729
 
14514
14730
  pm_token_t keyword = parser->previous;
14515
14731
  pm_arguments_t arguments = { 0 };
14516
- parse_arguments_list(parser, &arguments, false);
14732
+ parse_arguments_list(parser, &arguments, false, accepts_command_call);
14517
14733
 
14518
14734
  return (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
14519
14735
  }
@@ -14524,8 +14740,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14524
14740
 
14525
14741
  if (accept1(parser, PM_TOKEN_LESS_LESS)) {
14526
14742
  pm_token_t operator = parser->previous;
14527
- pm_node_t *expression = parse_expression(parser, PM_BINDING_POWER_NOT, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
14743
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
14528
14744
 
14745
+ pm_constant_id_t old_param_name = parser->current_param_name;
14746
+ parser->current_param_name = 0;
14529
14747
  pm_parser_scope_push(parser, true);
14530
14748
  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14531
14749
 
@@ -14538,18 +14756,19 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14538
14756
 
14539
14757
  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14540
14758
  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14541
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements);
14759
+ statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
14542
14760
  }
14543
14761
 
14544
14762
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
14545
14763
 
14546
14764
  pm_constant_id_list_t locals = parser->current_scope->locals;
14547
14765
  pm_parser_scope_pop(parser);
14766
+ parser->current_param_name = old_param_name;
14548
14767
  pm_do_loop_stack_pop(parser);
14549
14768
  return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
14550
14769
  }
14551
14770
 
14552
- pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_CLASS_NAME);
14771
+ pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_CLASS_NAME);
14553
14772
  pm_token_t name = parser->previous;
14554
14773
  if (name.type != PM_TOKEN_CONSTANT) {
14555
14774
  pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
@@ -14565,12 +14784,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14565
14784
  parser->command_start = true;
14566
14785
  parser_lex(parser);
14567
14786
 
14568
- superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CLASS_SUPERCLASS);
14787
+ superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CLASS_SUPERCLASS);
14569
14788
  } else {
14570
14789
  inheritance_operator = not_provided(parser);
14571
14790
  superclass = NULL;
14572
14791
  }
14573
14792
 
14793
+ pm_constant_id_t old_param_name = parser->current_param_name;
14794
+ parser->current_param_name = 0;
14574
14795
  pm_parser_scope_push(parser, true);
14575
14796
  if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
14576
14797
  expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
@@ -14587,7 +14808,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14587
14808
 
14588
14809
  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14589
14810
  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14590
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements);
14811
+ statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
14591
14812
  }
14592
14813
 
14593
14814
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
@@ -14598,6 +14819,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14598
14819
 
14599
14820
  pm_constant_id_list_t locals = parser->current_scope->locals;
14600
14821
  pm_parser_scope_pop(parser);
14822
+ parser->current_param_name = old_param_name;
14601
14823
  pm_do_loop_stack_pop(parser);
14602
14824
 
14603
14825
  if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
@@ -14613,12 +14835,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14613
14835
  pm_token_t operator = not_provided(parser);
14614
14836
  pm_token_t name = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = def_keyword.end, .end = def_keyword.end };
14615
14837
 
14838
+ // This context is necessary for lexing `...` in a bare params correctly.
14839
+ // It must be pushed before lexing the first param, so it is here.
14616
14840
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
14617
14841
  parser_lex(parser);
14842
+ pm_constant_id_t old_param_name = parser->current_param_name;
14618
14843
 
14619
14844
  switch (parser->current.type) {
14620
14845
  case PM_CASE_OPERATOR:
14621
14846
  pm_parser_scope_push(parser, true);
14847
+ parser->current_param_name = 0;
14622
14848
  lex_state_set(parser, PM_LEX_STATE_ENDFN);
14623
14849
  parser_lex(parser);
14624
14850
  name = parser->previous;
@@ -14630,6 +14856,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14630
14856
  receiver = parse_variable_call(parser);
14631
14857
 
14632
14858
  pm_parser_scope_push(parser, true);
14859
+ parser->current_param_name = 0;
14633
14860
  lex_state_set(parser, PM_LEX_STATE_FNAME);
14634
14861
  parser_lex(parser);
14635
14862
 
@@ -14638,6 +14865,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14638
14865
  } else {
14639
14866
  pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
14640
14867
  pm_parser_scope_push(parser, true);
14868
+ parser->current_param_name = 0;
14641
14869
  name = parser->previous;
14642
14870
  }
14643
14871
 
@@ -14655,6 +14883,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14655
14883
  case PM_TOKEN_KEYWORD___LINE__:
14656
14884
  case PM_TOKEN_KEYWORD___ENCODING__: {
14657
14885
  pm_parser_scope_push(parser, true);
14886
+ parser->current_param_name = 0;
14658
14887
  parser_lex(parser);
14659
14888
  pm_token_t identifier = parser->previous;
14660
14889
 
@@ -14708,9 +14937,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14708
14937
  break;
14709
14938
  }
14710
14939
  case PM_TOKEN_PARENTHESIS_LEFT: {
14940
+ // The current context is `PM_CONTEXT_DEF_PARAMS`, however the inner expression
14941
+ // of this parenthesis should not be processed under this context.
14942
+ // Thus, the context is popped here.
14943
+ context_pop(parser);
14711
14944
  parser_lex(parser);
14945
+
14712
14946
  pm_token_t lparen = parser->previous;
14713
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_DEF_RECEIVER);
14947
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_DEF_RECEIVER);
14714
14948
 
14715
14949
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
14716
14950
  pm_token_t rparen = parser->previous;
@@ -14722,11 +14956,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14722
14956
  receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
14723
14957
 
14724
14958
  pm_parser_scope_push(parser, true);
14959
+ parser->current_param_name = 0;
14960
+
14961
+ // To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as described the above.
14962
+ context_push(parser, PM_CONTEXT_DEF_PARAMS);
14725
14963
  name = parse_method_definition_name(parser);
14726
14964
  break;
14727
14965
  }
14728
14966
  default:
14729
14967
  pm_parser_scope_push(parser, true);
14968
+ parser->current_param_name = 0;
14730
14969
  name = parse_method_definition_name(parser);
14731
14970
  break;
14732
14971
  }
@@ -14779,6 +15018,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14779
15018
  }
14780
15019
  }
14781
15020
 
15021
+ uint32_t locals_body_index = (uint32_t) parser->current_scope->locals.size;
15022
+
14782
15023
  context_pop(parser);
14783
15024
  pm_node_t *statements = NULL;
14784
15025
  pm_token_t equal;
@@ -14794,11 +15035,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14794
15035
  pm_do_loop_stack_push(parser, false);
14795
15036
  statements = (pm_node_t *) pm_statements_node_create(parser);
14796
15037
 
14797
- pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, PM_ERR_DEF_ENDLESS);
15038
+ pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, PM_ERR_DEF_ENDLESS);
14798
15039
 
14799
15040
  if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
14800
15041
  pm_token_t rescue_keyword = parser->previous;
14801
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_RESCUE_MODIFIER_VALUE);
15042
+ pm_node_t *value = parse_expression(parser, binding_power, false, PM_ERR_RESCUE_MODIFIER_VALUE);
14802
15043
  pm_rescue_modifier_node_t *rescue_node = pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
14803
15044
  statement = (pm_node_t *)rescue_node;
14804
15045
  }
@@ -14829,7 +15070,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14829
15070
 
14830
15071
  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14831
15072
  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14832
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements);
15073
+ statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, true);
14833
15074
  }
14834
15075
 
14835
15076
  pm_accepts_block_stack_pop(parser);
@@ -14839,6 +15080,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14839
15080
  }
14840
15081
 
14841
15082
  pm_constant_id_list_t locals = parser->current_scope->locals;
15083
+ parser->current_param_name = old_param_name;
14842
15084
  pm_parser_scope_pop(parser);
14843
15085
 
14844
15086
  return (pm_node_t *) pm_def_node_create(
@@ -14848,6 +15090,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14848
15090
  params,
14849
15091
  statements,
14850
15092
  &locals,
15093
+ locals_body_index,
14851
15094
  &def_keyword,
14852
15095
  &operator,
14853
15096
  &lparen,
@@ -14866,18 +15109,19 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14866
15109
 
14867
15110
  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14868
15111
  lparen = parser->previous;
14869
- expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_DEFINED_EXPRESSION);
15112
+ expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_DEFINED_EXPRESSION);
14870
15113
 
14871
15114
  if (parser->recovering) {
14872
15115
  rparen = not_provided(parser);
14873
15116
  } else {
15117
+ accept1(parser, PM_TOKEN_NEWLINE);
14874
15118
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
14875
15119
  rparen = parser->previous;
14876
15120
  }
14877
15121
  } else {
14878
15122
  lparen = not_provided(parser);
14879
15123
  rparen = not_provided(parser);
14880
- expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_DEFINED_EXPRESSION);
15124
+ expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_DEFINED_EXPRESSION);
14881
15125
  }
14882
15126
 
14883
15127
  return (pm_node_t *) pm_defined_node_create(
@@ -14889,6 +15133,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14889
15133
  );
14890
15134
  }
14891
15135
  case PM_TOKEN_KEYWORD_END_UPCASE: {
15136
+ if (binding_power != PM_BINDING_POWER_STATEMENT) {
15137
+ pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
15138
+ }
15139
+
14892
15140
  parser_lex(parser);
14893
15141
  pm_token_t keyword = parser->previous;
14894
15142
 
@@ -14911,7 +15159,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14911
15159
  pm_token_t for_keyword = parser->previous;
14912
15160
  pm_node_t *index;
14913
15161
 
14914
- pm_parser_scope_push_transparent(parser);
14915
15162
  context_push(parser, PM_CONTEXT_FOR_INDEX);
14916
15163
 
14917
15164
  // First, parse out the first index expression.
@@ -14920,12 +15167,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14920
15167
  pm_node_t *name = NULL;
14921
15168
 
14922
15169
  if (token_begins_expression_p(parser->current.type)) {
14923
- name = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
15170
+ name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
14924
15171
  }
14925
15172
 
14926
15173
  index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
14927
15174
  } else if (token_begins_expression_p(parser->current.type)) {
14928
- index = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
15175
+ index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
14929
15176
  } else {
14930
15177
  pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
14931
15178
  index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
@@ -14939,13 +15186,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14939
15186
  }
14940
15187
 
14941
15188
  context_pop(parser);
14942
- pm_parser_scope_pop(parser);
14943
15189
  pm_do_loop_stack_push(parser, true);
14944
15190
 
14945
15191
  expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
14946
15192
  pm_token_t in_keyword = parser->previous;
14947
15193
 
14948
- pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_FOR_COLLECTION);
15194
+ pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_FOR_COLLECTION);
14949
15195
  pm_do_loop_stack_pop(parser);
14950
15196
 
14951
15197
  pm_token_t do_keyword;
@@ -14959,10 +15205,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14959
15205
  pm_statements_node_t *statements = NULL;
14960
15206
 
14961
15207
  if (!accept1(parser, PM_TOKEN_KEYWORD_END)) {
14962
- pm_parser_scope_push_transparent(parser);
14963
15208
  statements = parse_statements(parser, PM_CONTEXT_FOR);
14964
15209
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
14965
- pm_parser_scope_pop(parser);
14966
15210
  }
14967
15211
 
14968
15212
  return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
@@ -14971,6 +15215,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14971
15215
  parser_lex(parser);
14972
15216
  return parse_conditional(parser, PM_CONTEXT_IF);
14973
15217
  case PM_TOKEN_KEYWORD_UNDEF: {
15218
+ if (binding_power != PM_BINDING_POWER_STATEMENT) {
15219
+ pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
15220
+ }
15221
+
14974
15222
  parser_lex(parser);
14975
15223
  pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
14976
15224
  pm_node_t *name = parse_undef_argument(parser);
@@ -15011,7 +15259,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15011
15259
  if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15012
15260
  arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15013
15261
  } else {
15014
- receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_NOT_EXPRESSION);
15262
+ receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_NOT_EXPRESSION);
15015
15263
  pm_conditional_predicate(receiver);
15016
15264
 
15017
15265
  if (!parser->recovering) {
@@ -15021,7 +15269,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15021
15269
  }
15022
15270
  }
15023
15271
  } else {
15024
- receiver = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_NOT_EXPRESSION);
15272
+ receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_NOT_EXPRESSION);
15025
15273
  pm_conditional_predicate(receiver);
15026
15274
  }
15027
15275
 
@@ -15034,7 +15282,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15034
15282
  parser_lex(parser);
15035
15283
 
15036
15284
  pm_token_t module_keyword = parser->previous;
15037
- pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_MODULE_NAME);
15285
+ pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_MODULE_NAME);
15038
15286
  pm_token_t name;
15039
15287
 
15040
15288
  // If we can recover from a syntax error that occurred while parsing
@@ -15061,6 +15309,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15061
15309
  pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
15062
15310
  }
15063
15311
 
15312
+ pm_constant_id_t old_param_name = parser->current_param_name;
15313
+ parser->current_param_name = 0;
15064
15314
  pm_parser_scope_push(parser, true);
15065
15315
  accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
15066
15316
  pm_node_t *statements = NULL;
@@ -15073,11 +15323,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15073
15323
 
15074
15324
  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15075
15325
  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15076
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements);
15326
+ statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
15077
15327
  }
15078
15328
 
15079
15329
  pm_constant_id_list_t locals = parser->current_scope->locals;
15080
15330
  pm_parser_scope_pop(parser);
15331
+ parser->current_param_name = old_param_name;
15081
15332
 
15082
15333
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
15083
15334
 
@@ -15107,7 +15358,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15107
15358
  parser_lex(parser);
15108
15359
  pm_token_t keyword = parser->previous;
15109
15360
 
15110
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
15361
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
15111
15362
  pm_do_loop_stack_pop(parser);
15112
15363
 
15113
15364
  expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
@@ -15128,7 +15379,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15128
15379
  parser_lex(parser);
15129
15380
  pm_token_t keyword = parser->previous;
15130
15381
 
15131
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
15382
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
15132
15383
  pm_do_loop_stack_pop(parser);
15133
15384
 
15134
15385
  expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
@@ -15146,7 +15397,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15146
15397
  }
15147
15398
  case PM_TOKEN_PERCENT_LOWER_I: {
15148
15399
  parser_lex(parser);
15149
- pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
15400
+ pm_token_t opening = parser->previous;
15401
+ pm_array_node_t *array = pm_array_node_create(parser, &opening);
15150
15402
 
15151
15403
  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15152
15404
  accept1(parser, PM_TOKEN_WORDS_SEP);
@@ -15161,14 +15413,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15161
15413
  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
15162
15414
  }
15163
15415
 
15164
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
15165
- pm_array_node_close_set(array, &parser->previous);
15416
+ pm_token_t closing = parser->current;
15417
+ if (match1(parser, PM_TOKEN_EOF)) {
15418
+ pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
15419
+ closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15420
+ } else {
15421
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
15422
+ }
15423
+ pm_array_node_close_set(array, &closing);
15166
15424
 
15167
15425
  return (pm_node_t *) array;
15168
15426
  }
15169
15427
  case PM_TOKEN_PERCENT_UPPER_I: {
15170
15428
  parser_lex(parser);
15171
- pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
15429
+ pm_token_t opening = parser->previous;
15430
+ pm_array_node_t *array = pm_array_node_create(parser, &opening);
15172
15431
 
15173
15432
  // This is the current node that we are parsing that will be added to the
15174
15433
  // list of elements.
@@ -15308,14 +15567,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15308
15567
  pm_array_node_elements_append(array, current);
15309
15568
  }
15310
15569
 
15311
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
15312
- pm_array_node_close_set(array, &parser->previous);
15570
+ pm_token_t closing = parser->current;
15571
+ if (match1(parser, PM_TOKEN_EOF)) {
15572
+ pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
15573
+ closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15574
+ } else {
15575
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
15576
+ }
15577
+ pm_array_node_close_set(array, &closing);
15313
15578
 
15314
15579
  return (pm_node_t *) array;
15315
15580
  }
15316
15581
  case PM_TOKEN_PERCENT_LOWER_W: {
15317
15582
  parser_lex(parser);
15318
- pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
15583
+ pm_token_t opening = parser->previous;
15584
+ pm_array_node_t *array = pm_array_node_create(parser, &opening);
15319
15585
 
15320
15586
  // skip all leading whitespaces
15321
15587
  accept1(parser, PM_TOKEN_WORDS_SEP);
@@ -15335,28 +15601,40 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15335
15601
  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
15336
15602
  }
15337
15603
 
15338
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
15339
- pm_array_node_close_set(array, &parser->previous);
15604
+ pm_token_t closing = parser->current;
15605
+ if (match1(parser, PM_TOKEN_EOF)) {
15606
+ pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
15607
+ closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15608
+ } else {
15609
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
15610
+ }
15340
15611
 
15612
+ pm_array_node_close_set(array, &closing);
15341
15613
  return (pm_node_t *) array;
15342
15614
  }
15343
15615
  case PM_TOKEN_PERCENT_UPPER_W: {
15344
15616
  parser_lex(parser);
15345
- pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
15617
+ pm_token_t opening = parser->previous;
15618
+ pm_array_node_t *array = pm_array_node_create(parser, &opening);
15346
15619
 
15347
- // This is the current node that we are parsing that will be added to the
15348
- // list of elements.
15620
+ // This is the current node that we are parsing that will be added
15621
+ // to the list of elements.
15349
15622
  pm_node_t *current = NULL;
15350
15623
 
15351
15624
  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15352
15625
  switch (parser->current.type) {
15353
15626
  case PM_TOKEN_WORDS_SEP: {
15627
+ // Reset the explicit encoding if we hit a separator
15628
+ // since each element can have its own encoding.
15629
+ parser->explicit_encoding = NULL;
15630
+
15354
15631
  if (current == NULL) {
15355
- // If we hit a separator before we have any content, then we don't
15356
- // need to do anything.
15632
+ // If we hit a separator before we have any content,
15633
+ // then we don't need to do anything.
15357
15634
  } else {
15358
- // If we hit a separator after we've hit content, then we need to
15359
- // append that content to the list and reset the current node.
15635
+ // If we hit a separator after we've hit content,
15636
+ // then we need to append that content to the list
15637
+ // and reset the current node.
15360
15638
  pm_array_node_elements_append(array, current);
15361
15639
  current = NULL;
15362
15640
  }
@@ -15369,22 +15647,25 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15369
15647
  pm_token_t closing = not_provided(parser);
15370
15648
 
15371
15649
  pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
15650
+ pm_node_flag_set(string, parse_unescaped_encoding(parser));
15372
15651
  parser_lex(parser);
15373
15652
 
15374
15653
  if (current == NULL) {
15375
- // If we hit content and the current node is NULL, then this is
15376
- // the first string content we've seen. In that case we're going
15377
- // to create a new string node and set that to the current.
15654
+ // If we hit content and the current node is NULL,
15655
+ // then this is the first string content we've seen.
15656
+ // In that case we're going to create a new string
15657
+ // node and set that to the current.
15378
15658
  current = string;
15379
15659
  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
15380
- // If we hit string content and the current node is an
15381
- // interpolated string, then we need to append the string content
15382
- // to the list of child nodes.
15660
+ // If we hit string content and the current node is
15661
+ // an interpolated string, then we need to append
15662
+ // the string content to the list of child nodes.
15383
15663
  pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
15384
15664
  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
15385
- // If we hit string content and the current node is a string node,
15386
- // then we need to convert the current node into an interpolated
15387
- // string and add the string content to the list of child nodes.
15665
+ // If we hit string content and the current node is
15666
+ // a string node, then we need to convert the
15667
+ // current node into an interpolated string and add
15668
+ // the string content to the list of child nodes.
15388
15669
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15389
15670
  pm_interpolated_string_node_append(interpolated, current);
15390
15671
  pm_interpolated_string_node_append(interpolated, string);
@@ -15397,24 +15678,27 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15397
15678
  }
15398
15679
  case PM_TOKEN_EMBVAR: {
15399
15680
  if (current == NULL) {
15400
- // If we hit an embedded variable and the current node is NULL,
15401
- // then this is the start of a new string. We'll set the current
15402
- // node to a new interpolated string.
15681
+ // If we hit an embedded variable and the current
15682
+ // node is NULL, then this is the start of a new
15683
+ // string. We'll set the current node to a new
15684
+ // interpolated string.
15403
15685
  pm_token_t opening = not_provided(parser);
15404
15686
  pm_token_t closing = not_provided(parser);
15405
15687
  current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15406
15688
  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
15407
- // If we hit an embedded variable and the current node is a string
15408
- // node, then we'll convert the current into an interpolated
15409
- // string and add the string node to the list of parts.
15689
+ // If we hit an embedded variable and the current
15690
+ // node is a string node, then we'll convert the
15691
+ // current into an interpolated string and add the
15692
+ // string node to the list of parts.
15410
15693
  pm_token_t opening = not_provided(parser);
15411
15694
  pm_token_t closing = not_provided(parser);
15412
15695
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15413
15696
  pm_interpolated_string_node_append(interpolated, current);
15414
15697
  current = (pm_node_t *) interpolated;
15415
15698
  } else {
15416
- // If we hit an embedded variable and the current node is an
15417
- // interpolated string, then we'll just add the embedded variable.
15699
+ // If we hit an embedded variable and the current
15700
+ // node is an interpolated string, then we'll just
15701
+ // add the embedded variable.
15418
15702
  }
15419
15703
 
15420
15704
  pm_node_t *part = parse_string_part(parser);
@@ -15423,25 +15707,27 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15423
15707
  }
15424
15708
  case PM_TOKEN_EMBEXPR_BEGIN: {
15425
15709
  if (current == NULL) {
15426
- // If we hit an embedded expression and the current node is NULL,
15427
- // then this is the start of a new string. We'll set the current
15428
- // node to a new interpolated string.
15710
+ // If we hit an embedded expression and the current
15711
+ // node is NULL, then this is the start of a new
15712
+ // string. We'll set the current node to a new
15713
+ // interpolated string.
15429
15714
  pm_token_t opening = not_provided(parser);
15430
15715
  pm_token_t closing = not_provided(parser);
15431
15716
  current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15432
15717
  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
15433
- // If we hit an embedded expression and the current node is a
15434
- // string node, then we'll convert the current into an
15435
- // interpolated string and add the string node to the list of
15436
- // parts.
15718
+ // If we hit an embedded expression and the current
15719
+ // node is a string node, then we'll convert the
15720
+ // current into an interpolated string and add the
15721
+ // string node to the list of parts.
15437
15722
  pm_token_t opening = not_provided(parser);
15438
15723
  pm_token_t closing = not_provided(parser);
15439
15724
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15440
15725
  pm_interpolated_string_node_append(interpolated, current);
15441
15726
  current = (pm_node_t *) interpolated;
15442
15727
  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
15443
- // If we hit an embedded expression and the current node is an
15444
- // interpolated string, then we'll just continue on.
15728
+ // If we hit an embedded expression and the current
15729
+ // node is an interpolated string, then we'll just
15730
+ // continue on.
15445
15731
  } else {
15446
15732
  assert(false && "unreachable");
15447
15733
  }
@@ -15462,9 +15748,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15462
15748
  pm_array_node_elements_append(array, current);
15463
15749
  }
15464
15750
 
15465
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
15466
- pm_array_node_close_set(array, &parser->previous);
15751
+ pm_token_t closing = parser->current;
15752
+ if (match1(parser, PM_TOKEN_EOF)) {
15753
+ pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
15754
+ closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15755
+ } else {
15756
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
15757
+ }
15467
15758
 
15759
+ pm_array_node_close_set(array, &closing);
15468
15760
  return (pm_node_t *) array;
15469
15761
  }
15470
15762
  case PM_TOKEN_REGEXP_BEGIN: {
@@ -15527,8 +15819,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15527
15819
  }
15528
15820
  }
15529
15821
 
15530
- expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
15531
- pm_interpolated_regular_expression_node_closing_set(node, &parser->previous);
15822
+ pm_token_t closing = parser->current;
15823
+ if (match1(parser, PM_TOKEN_EOF)) {
15824
+ pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
15825
+ closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15826
+ } else {
15827
+ expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
15828
+ }
15829
+ pm_interpolated_regular_expression_node_closing_set(node, &closing);
15532
15830
 
15533
15831
  return (pm_node_t *) node;
15534
15832
  }
@@ -15566,8 +15864,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15566
15864
  pm_token_t content = parser->current;
15567
15865
  parser_lex(parser);
15568
15866
 
15569
- if (accept1(parser, PM_TOKEN_STRING_END)) {
15570
- return (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
15867
+ if (match1(parser, PM_TOKEN_STRING_END)) {
15868
+ pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
15869
+ pm_node_flag_set(node, parse_unescaped_encoding(parser));
15870
+ parser_lex(parser);
15871
+ return node;
15571
15872
  }
15572
15873
 
15573
15874
  // If we get here, then we have interpolation so we'll need to
@@ -15576,7 +15877,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15576
15877
 
15577
15878
  pm_token_t opening = not_provided(parser);
15578
15879
  pm_token_t closing = not_provided(parser);
15880
+
15579
15881
  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
15882
+ pm_node_flag_set(part, parse_unescaped_encoding(parser));
15580
15883
 
15581
15884
  pm_interpolated_xstring_node_append(node, part);
15582
15885
  } else {
@@ -15593,8 +15896,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15593
15896
  }
15594
15897
  }
15595
15898
 
15596
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
15597
- pm_interpolated_xstring_node_closing_set(node, &parser->previous);
15899
+ pm_token_t closing = parser->current;
15900
+ if (match1(parser, PM_TOKEN_EOF)) {
15901
+ pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
15902
+ closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15903
+ } else {
15904
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
15905
+ }
15906
+ pm_interpolated_xstring_node_closing_set(node, &closing);
15907
+
15598
15908
  return (pm_node_t *) node;
15599
15909
  }
15600
15910
  case PM_TOKEN_USTAR: {
@@ -15611,7 +15921,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15611
15921
  pm_node_t *name = NULL;
15612
15922
 
15613
15923
  if (token_begins_expression_p(parser->current.type)) {
15614
- name = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
15924
+ name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
15615
15925
  }
15616
15926
 
15617
15927
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
@@ -15626,7 +15936,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15626
15936
  parser_lex(parser);
15627
15937
 
15628
15938
  pm_token_t operator = parser->previous;
15629
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, PM_ERR_UNARY_RECEIVER_BANG);
15939
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER_BANG);
15630
15940
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
15631
15941
 
15632
15942
  pm_conditional_predicate(receiver);
@@ -15636,7 +15946,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15636
15946
  parser_lex(parser);
15637
15947
 
15638
15948
  pm_token_t operator = parser->previous;
15639
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, PM_ERR_UNARY_RECEIVER_TILDE);
15949
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_TILDE);
15640
15950
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
15641
15951
 
15642
15952
  return (pm_node_t *) node;
@@ -15645,7 +15955,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15645
15955
  parser_lex(parser);
15646
15956
 
15647
15957
  pm_token_t operator = parser->previous;
15648
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, PM_ERR_UNARY_RECEIVER_MINUS);
15958
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_MINUS);
15649
15959
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
15650
15960
 
15651
15961
  return (pm_node_t *) node;
@@ -15654,11 +15964,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15654
15964
  parser_lex(parser);
15655
15965
 
15656
15966
  pm_token_t operator = parser->previous;
15657
- pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, PM_ERR_UNARY_RECEIVER_MINUS);
15967
+ pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_MINUS);
15658
15968
 
15659
15969
  if (accept1(parser, PM_TOKEN_STAR_STAR)) {
15660
15970
  pm_token_t exponent_operator = parser->previous;
15661
- pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, PM_ERR_EXPECT_ARGUMENT);
15971
+ pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, PM_ERR_EXPECT_ARGUMENT);
15662
15972
  node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent);
15663
15973
  node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
15664
15974
  } else {
@@ -15686,7 +15996,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15686
15996
 
15687
15997
  pm_token_t operator = parser->previous;
15688
15998
  pm_parser_scope_push(parser, false);
15689
- pm_block_parameters_node_t *params;
15999
+ pm_block_parameters_node_t *block_parameters;
15690
16000
 
15691
16001
  switch (parser->current.type) {
15692
16002
  case PM_TOKEN_PARENTHESIS_LEFT: {
@@ -15695,31 +16005,37 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15695
16005
  parser_lex(parser);
15696
16006
 
15697
16007
  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15698
- params = pm_block_parameters_node_create(parser, NULL, &opening);
16008
+ block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
15699
16009
  } else {
15700
- params = parse_block_parameters(parser, false, &opening, true);
16010
+ block_parameters = parse_block_parameters(parser, false, &opening, true);
15701
16011
  }
15702
16012
 
15703
16013
  accept1(parser, PM_TOKEN_NEWLINE);
15704
16014
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
15705
16015
 
15706
- pm_block_parameters_node_closing_set(params, &parser->previous);
16016
+ pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
15707
16017
  break;
15708
16018
  }
15709
16019
  case PM_CASE_PARAMETER: {
15710
16020
  parser->current_scope->explicit_params = true;
15711
16021
  pm_accepts_block_stack_push(parser, false);
15712
16022
  pm_token_t opening = not_provided(parser);
15713
- params = parse_block_parameters(parser, false, &opening, true);
16023
+ block_parameters = parse_block_parameters(parser, false, &opening, true);
15714
16024
  pm_accepts_block_stack_pop(parser);
15715
16025
  break;
15716
16026
  }
15717
16027
  default: {
15718
- params = NULL;
16028
+ block_parameters = NULL;
15719
16029
  break;
15720
16030
  }
15721
16031
  }
15722
16032
 
16033
+ uint32_t locals_body_index = 0;
16034
+
16035
+ if (block_parameters) {
16036
+ locals_body_index = (uint32_t) parser->current_scope->locals.size;
16037
+ }
16038
+
15723
16039
  pm_token_t opening;
15724
16040
  pm_node_t *body = NULL;
15725
16041
  parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
@@ -15743,22 +16059,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15743
16059
 
15744
16060
  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15745
16061
  assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
15746
- body = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) body);
16062
+ body = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) body, false);
15747
16063
  }
15748
16064
 
15749
16065
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
15750
16066
  }
15751
16067
 
16068
+ pm_node_t *parameters = (pm_node_t *) block_parameters;
16069
+ uint8_t maximum = parser->current_scope->numbered_parameters;
16070
+
16071
+ if (parameters == NULL && (maximum > 0)) {
16072
+ parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = operator.start, .end = parser->previous.end }, maximum);
16073
+ locals_body_index = maximum;
16074
+ }
16075
+
15752
16076
  pm_constant_id_list_t locals = parser->current_scope->locals;
15753
16077
  pm_parser_scope_pop(parser);
15754
16078
  pm_accepts_block_stack_pop(parser);
15755
- return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, params, body);
16079
+ return (pm_node_t *) pm_lambda_node_create(parser, &locals, locals_body_index, &operator, &opening, &parser->previous, parameters, body);
15756
16080
  }
15757
16081
  case PM_TOKEN_UPLUS: {
15758
16082
  parser_lex(parser);
15759
16083
 
15760
16084
  pm_token_t operator = parser->previous;
15761
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, PM_ERR_UNARY_RECEIVER_PLUS);
16085
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_PLUS);
15762
16086
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
15763
16087
 
15764
16088
  return (pm_node_t *) node;
@@ -15781,14 +16105,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15781
16105
  }
15782
16106
 
15783
16107
  static inline pm_node_t *
15784
- parse_assignment_value(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
15785
- pm_node_t *value = parse_value_expression(parser, binding_power, diag_id);
16108
+ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
16109
+ pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id);
15786
16110
 
15787
16111
  // Contradicting binding powers, the right-hand-side value of rthe assignment allows the `rescue` modifier.
15788
16112
  if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
15789
16113
  pm_token_t rescue = parser->current;
15790
16114
  parser_lex(parser);
15791
- pm_node_t *right = parse_expression(parser, binding_power, PM_ERR_RESCUE_MODIFIER_VALUE);
16115
+ pm_node_t *right = parse_expression(parser, binding_power, false, PM_ERR_RESCUE_MODIFIER_VALUE);
15792
16116
 
15793
16117
  return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
15794
16118
  }
@@ -15798,8 +16122,8 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t binding_power, pm
15798
16122
 
15799
16123
 
15800
16124
  static inline pm_node_t *
15801
- parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
15802
- pm_node_t *value = parse_starred_expression(parser, binding_power, diag_id);
16125
+ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
16126
+ pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id);
15803
16127
 
15804
16128
  bool is_single_value = true;
15805
16129
  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
@@ -15811,7 +16135,7 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
15811
16135
  value = (pm_node_t *) array;
15812
16136
 
15813
16137
  while (accept1(parser, PM_TOKEN_COMMA)) {
15814
- pm_node_t *element = parse_starred_expression(parser, binding_power, PM_ERR_ARRAY_ELEMENT);
16138
+ pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT);
15815
16139
  pm_array_node_elements_append(array, element);
15816
16140
  if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
15817
16141
  }
@@ -15821,7 +16145,7 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
15821
16145
  if (is_single_value && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
15822
16146
  pm_token_t rescue = parser->current;
15823
16147
  parser_lex(parser);
15824
- pm_node_t *right = parse_expression(parser, binding_power, PM_ERR_RESCUE_MODIFIER_VALUE);
16148
+ pm_node_t *right = parse_expression(parser, binding_power, false, PM_ERR_RESCUE_MODIFIER_VALUE);
15825
16149
 
15826
16150
  return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
15827
16151
  }
@@ -15879,7 +16203,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
15879
16203
  pm_string_list_t named_captures = { 0 };
15880
16204
  pm_node_t *result;
15881
16205
 
15882
- if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
16206
+ if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, parser->encoding) && (named_captures.length > 0)) {
15883
16207
  // Since we should not create a MatchWriteNode when all capture names
15884
16208
  // are invalid, creating a MatchWriteNode is delayed here.
15885
16209
  pm_match_write_node_t *match = NULL;
@@ -15913,6 +16237,8 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
15913
16237
  if (memory == NULL) abort();
15914
16238
 
15915
16239
  memcpy(memory, source, length);
16240
+ // This silences clang analyzer warning about leak of memory pointed by `memory`.
16241
+ // NOLINTNEXTLINE(clang-analyzer-*)
15916
16242
  name = pm_parser_constant_id_owned(parser, (const uint8_t *) memory, length);
15917
16243
 
15918
16244
  if (pm_token_is_numbered_parameter(source, source + length)) {
@@ -15960,7 +16286,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
15960
16286
  }
15961
16287
 
15962
16288
  static inline pm_node_t *
15963
- parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power) {
16289
+ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call) {
15964
16290
  pm_token_t token = parser->current;
15965
16291
 
15966
16292
  switch (token.type) {
@@ -15979,7 +16305,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15979
16305
  /* fallthrough */
15980
16306
  case PM_CASE_WRITABLE: {
15981
16307
  parser_lex(parser);
15982
- pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
16308
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
15983
16309
  return parse_write(parser, node, &token, value);
15984
16310
  }
15985
16311
  case PM_SPLAT_NODE: {
@@ -15987,7 +16313,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15987
16313
  pm_multi_target_node_targets_append(parser, multi_target, node);
15988
16314
 
15989
16315
  parser_lex(parser);
15990
- pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
16316
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
15991
16317
  return parse_write(parser, (pm_node_t *) multi_target, &token, value);
15992
16318
  }
15993
16319
  default:
@@ -16009,7 +16335,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16009
16335
  case PM_GLOBAL_VARIABLE_READ_NODE: {
16010
16336
  parser_lex(parser);
16011
16337
 
16012
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16338
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16013
16339
  pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
16014
16340
 
16015
16341
  pm_node_destroy(parser, node);
@@ -16018,7 +16344,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16018
16344
  case PM_CLASS_VARIABLE_READ_NODE: {
16019
16345
  parser_lex(parser);
16020
16346
 
16021
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16347
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16022
16348
  pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
16023
16349
 
16024
16350
  pm_node_destroy(parser, node);
@@ -16027,13 +16353,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16027
16353
  case PM_CONSTANT_PATH_NODE: {
16028
16354
  parser_lex(parser);
16029
16355
 
16030
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16356
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16031
16357
  return (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
16032
16358
  }
16033
16359
  case PM_CONSTANT_READ_NODE: {
16034
16360
  parser_lex(parser);
16035
16361
 
16036
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16362
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16037
16363
  pm_node_t *result = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
16038
16364
 
16039
16365
  pm_node_destroy(parser, node);
@@ -16042,7 +16368,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16042
16368
  case PM_INSTANCE_VARIABLE_READ_NODE: {
16043
16369
  parser_lex(parser);
16044
16370
 
16045
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16371
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16046
16372
  pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
16047
16373
 
16048
16374
  pm_node_destroy(parser, node);
@@ -16052,7 +16378,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16052
16378
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
16053
16379
  parser_lex(parser);
16054
16380
 
16055
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16381
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16056
16382
  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
16057
16383
 
16058
16384
  pm_node_destroy(parser, node);
@@ -16070,7 +16396,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16070
16396
  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
16071
16397
 
16072
16398
  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16073
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16399
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16074
16400
  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
16075
16401
 
16076
16402
  pm_node_destroy(parser, (pm_node_t *) cast);
@@ -16081,7 +16407,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16081
16407
  // this is an aref expression, and we can transform it into
16082
16408
  // an aset expression.
16083
16409
  if (pm_call_node_index_p(cast)) {
16084
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16410
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16085
16411
  return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
16086
16412
  }
16087
16413
 
@@ -16093,7 +16419,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16093
16419
  }
16094
16420
 
16095
16421
  parse_call_operator_write(parser, cast, &token);
16096
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16422
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16097
16423
  return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
16098
16424
  }
16099
16425
  case PM_MULTI_WRITE_NODE: {
@@ -16120,7 +16446,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16120
16446
  case PM_GLOBAL_VARIABLE_READ_NODE: {
16121
16447
  parser_lex(parser);
16122
16448
 
16123
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16449
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16124
16450
  pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
16125
16451
 
16126
16452
  pm_node_destroy(parser, node);
@@ -16129,7 +16455,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16129
16455
  case PM_CLASS_VARIABLE_READ_NODE: {
16130
16456
  parser_lex(parser);
16131
16457
 
16132
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16458
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16133
16459
  pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
16134
16460
 
16135
16461
  pm_node_destroy(parser, node);
@@ -16138,13 +16464,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16138
16464
  case PM_CONSTANT_PATH_NODE: {
16139
16465
  parser_lex(parser);
16140
16466
 
16141
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16467
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16142
16468
  return (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
16143
16469
  }
16144
16470
  case PM_CONSTANT_READ_NODE: {
16145
16471
  parser_lex(parser);
16146
16472
 
16147
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16473
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16148
16474
  pm_node_t *result = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
16149
16475
 
16150
16476
  pm_node_destroy(parser, node);
@@ -16153,7 +16479,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16153
16479
  case PM_INSTANCE_VARIABLE_READ_NODE: {
16154
16480
  parser_lex(parser);
16155
16481
 
16156
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16482
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16157
16483
  pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
16158
16484
 
16159
16485
  pm_node_destroy(parser, node);
@@ -16163,7 +16489,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16163
16489
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
16164
16490
  parser_lex(parser);
16165
16491
 
16166
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16492
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16167
16493
  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
16168
16494
 
16169
16495
  pm_node_destroy(parser, node);
@@ -16181,7 +16507,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16181
16507
  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
16182
16508
 
16183
16509
  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16184
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16510
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16185
16511
  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
16186
16512
 
16187
16513
  pm_node_destroy(parser, (pm_node_t *) cast);
@@ -16192,7 +16518,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16192
16518
  // this is an aref expression, and we can transform it into
16193
16519
  // an aset expression.
16194
16520
  if (pm_call_node_index_p(cast)) {
16195
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16521
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16196
16522
  return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
16197
16523
  }
16198
16524
 
@@ -16204,7 +16530,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16204
16530
  }
16205
16531
 
16206
16532
  parse_call_operator_write(parser, cast, &token);
16207
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16533
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16208
16534
  return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
16209
16535
  }
16210
16536
  case PM_MULTI_WRITE_NODE: {
@@ -16241,7 +16567,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16241
16567
  case PM_GLOBAL_VARIABLE_READ_NODE: {
16242
16568
  parser_lex(parser);
16243
16569
 
16244
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16570
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16245
16571
  pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
16246
16572
 
16247
16573
  pm_node_destroy(parser, node);
@@ -16250,7 +16576,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16250
16576
  case PM_CLASS_VARIABLE_READ_NODE: {
16251
16577
  parser_lex(parser);
16252
16578
 
16253
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16579
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16254
16580
  pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
16255
16581
 
16256
16582
  pm_node_destroy(parser, node);
@@ -16259,13 +16585,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16259
16585
  case PM_CONSTANT_PATH_NODE: {
16260
16586
  parser_lex(parser);
16261
16587
 
16262
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16588
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16263
16589
  return (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
16264
16590
  }
16265
16591
  case PM_CONSTANT_READ_NODE: {
16266
16592
  parser_lex(parser);
16267
16593
 
16268
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16594
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16269
16595
  pm_node_t *result = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
16270
16596
 
16271
16597
  pm_node_destroy(parser, node);
@@ -16274,7 +16600,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16274
16600
  case PM_INSTANCE_VARIABLE_READ_NODE: {
16275
16601
  parser_lex(parser);
16276
16602
 
16277
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16603
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16278
16604
  pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
16279
16605
 
16280
16606
  pm_node_destroy(parser, node);
@@ -16284,7 +16610,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16284
16610
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
16285
16611
  parser_lex(parser);
16286
16612
 
16287
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16613
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16288
16614
  pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
16289
16615
 
16290
16616
  pm_node_destroy(parser, node);
@@ -16302,7 +16628,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16302
16628
  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
16303
16629
 
16304
16630
  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16305
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16631
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16306
16632
  pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
16307
16633
 
16308
16634
  pm_node_destroy(parser, (pm_node_t *) cast);
@@ -16313,7 +16639,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16313
16639
  // this is an aref expression, and we can transform it into
16314
16640
  // an aset expression.
16315
16641
  if (pm_call_node_index_p(cast)) {
16316
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16642
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16317
16643
  return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
16318
16644
  }
16319
16645
 
@@ -16325,7 +16651,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16325
16651
  }
16326
16652
 
16327
16653
  parse_call_operator_write(parser, cast, &token);
16328
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16654
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16329
16655
  return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
16330
16656
  }
16331
16657
  case PM_MULTI_WRITE_NODE: {
@@ -16347,14 +16673,14 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16347
16673
  case PM_TOKEN_KEYWORD_AND: {
16348
16674
  parser_lex(parser);
16349
16675
 
16350
- pm_node_t *right = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16676
+ pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16351
16677
  return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
16352
16678
  }
16353
16679
  case PM_TOKEN_KEYWORD_OR:
16354
16680
  case PM_TOKEN_PIPE_PIPE: {
16355
16681
  parser_lex(parser);
16356
16682
 
16357
- pm_node_t *right = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16683
+ pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16358
16684
  return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
16359
16685
  }
16360
16686
  case PM_TOKEN_EQUAL_TILDE: {
@@ -16366,7 +16692,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16366
16692
  //
16367
16693
  // In this case, `foo` should be a method call and not a local yet.
16368
16694
  parser_lex(parser);
16369
- pm_node_t *argument = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16695
+ pm_node_t *argument = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16370
16696
 
16371
16697
  // By default, we're going to create a call node and then return it.
16372
16698
  pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument);
@@ -16451,7 +16777,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16451
16777
  case PM_TOKEN_STAR_STAR: {
16452
16778
  parser_lex(parser);
16453
16779
 
16454
- pm_node_t *argument = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16780
+ pm_node_t *argument = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16455
16781
  return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument);
16456
16782
  }
16457
16783
  case PM_TOKEN_AMPERSAND_DOT:
@@ -16462,7 +16788,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16462
16788
 
16463
16789
  // This if statement handles the foo.() syntax.
16464
16790
  if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
16465
- parse_arguments_list(parser, &arguments, true);
16791
+ parse_arguments_list(parser, &arguments, true, false);
16466
16792
  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
16467
16793
  }
16468
16794
 
@@ -16484,7 +16810,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16484
16810
  }
16485
16811
  }
16486
16812
 
16487
- parse_arguments_list(parser, &arguments, true);
16813
+ parse_arguments_list(parser, &arguments, true, accepts_command_call);
16488
16814
  pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
16489
16815
 
16490
16816
  if (
@@ -16504,7 +16830,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16504
16830
 
16505
16831
  pm_node_t *right = NULL;
16506
16832
  if (token_begins_expression_p(parser->current.type)) {
16507
- right = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16833
+ right = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16508
16834
  }
16509
16835
 
16510
16836
  return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
@@ -16513,14 +16839,14 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16513
16839
  pm_token_t keyword = parser->current;
16514
16840
  parser_lex(parser);
16515
16841
 
16516
- pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_IF_PREDICATE);
16842
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
16517
16843
  return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
16518
16844
  }
16519
16845
  case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
16520
16846
  pm_token_t keyword = parser->current;
16521
16847
  parser_lex(parser);
16522
16848
 
16523
- pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
16849
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
16524
16850
  return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
16525
16851
  }
16526
16852
  case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
@@ -16528,7 +16854,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16528
16854
  pm_statements_node_t *statements = pm_statements_node_create(parser);
16529
16855
  pm_statements_node_body_append(statements, node);
16530
16856
 
16531
- pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
16857
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
16532
16858
  return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
16533
16859
  }
16534
16860
  case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
@@ -16536,13 +16862,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16536
16862
  pm_statements_node_t *statements = pm_statements_node_create(parser);
16537
16863
  pm_statements_node_body_append(statements, node);
16538
16864
 
16539
- pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
16865
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
16540
16866
  return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
16541
16867
  }
16542
16868
  case PM_TOKEN_QUESTION_MARK: {
16543
16869
  pm_token_t qmark = parser->current;
16544
16870
  parser_lex(parser);
16545
- pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_TERNARY_EXPRESSION_TRUE);
16871
+ pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_TERNARY_EXPRESSION_TRUE);
16546
16872
 
16547
16873
  if (parser->recovering) {
16548
16874
  // If parsing the true expression of this ternary resulted in a syntax
@@ -16561,7 +16887,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16561
16887
  expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
16562
16888
 
16563
16889
  pm_token_t colon = parser->previous;
16564
- pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_TERNARY_EXPRESSION_FALSE);
16890
+ pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_TERNARY_EXPRESSION_FALSE);
16565
16891
 
16566
16892
  return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
16567
16893
  }
@@ -16587,7 +16913,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16587
16913
  pm_token_t message = parser->previous;
16588
16914
  pm_arguments_t arguments = { 0 };
16589
16915
 
16590
- parse_arguments_list(parser, &arguments, true);
16916
+ parse_arguments_list(parser, &arguments, true, accepts_command_call);
16591
16917
  path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
16592
16918
  } else {
16593
16919
  // Otherwise, this is a constant path. That would look like Foo::Bar.
@@ -16612,7 +16938,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16612
16938
  // If we have an identifier following a '::' operator, then it is for
16613
16939
  // sure a method call.
16614
16940
  pm_arguments_t arguments = { 0 };
16615
- parse_arguments_list(parser, &arguments, true);
16941
+ parse_arguments_list(parser, &arguments, true, accepts_command_call);
16616
16942
  pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
16617
16943
 
16618
16944
  // If this is followed by a comma then it is a multiple assignment.
@@ -16626,7 +16952,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16626
16952
  // If we have a parenthesis following a '::' operator, then it is the
16627
16953
  // method call shorthand. That would look like Foo::(bar).
16628
16954
  pm_arguments_t arguments = { 0 };
16629
- parse_arguments_list(parser, &arguments, true);
16955
+ parse_arguments_list(parser, &arguments, true, false);
16630
16956
 
16631
16957
  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
16632
16958
  }
@@ -16640,7 +16966,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16640
16966
  case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
16641
16967
  parser_lex(parser);
16642
16968
  accept1(parser, PM_TOKEN_NEWLINE);
16643
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_RESCUE_MODIFIER_VALUE);
16969
+ pm_node_t *value = parse_expression(parser, binding_power, true, PM_ERR_RESCUE_MODIFIER_VALUE);
16644
16970
 
16645
16971
  return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
16646
16972
  }
@@ -16736,16 +17062,39 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16736
17062
  * determine if they need to perform additional cleanup.
16737
17063
  */
16738
17064
  static pm_node_t *
16739
- parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
17065
+ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
16740
17066
  pm_token_t recovery = parser->previous;
16741
- pm_node_t *node = parse_expression_prefix(parser, binding_power);
17067
+ pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
16742
17068
 
16743
- // If we found a syntax error, then the type of node returned by
16744
- // parse_expression_prefix is going to be a missing node. In that case we need
16745
- // to add the error message to the parser's error list.
16746
- if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
16747
- pm_parser_err(parser, recovery.end, recovery.end, diag_id);
16748
- return node;
17069
+ switch (PM_NODE_TYPE(node)) {
17070
+ case PM_MISSING_NODE:
17071
+ // If we found a syntax error, then the type of node returned by
17072
+ // parse_expression_prefix is going to be a missing node. In that
17073
+ // case we need to add the error message to the parser's error list.
17074
+ pm_parser_err(parser, recovery.end, recovery.end, diag_id);
17075
+ return node;
17076
+ case PM_PRE_EXECUTION_NODE:
17077
+ case PM_POST_EXECUTION_NODE:
17078
+ case PM_ALIAS_GLOBAL_VARIABLE_NODE:
17079
+ case PM_ALIAS_METHOD_NODE:
17080
+ case PM_UNDEF_NODE:
17081
+ // These expressions are statements, and cannot be followed by
17082
+ // operators (except modifiers).
17083
+ if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER_RESCUE) {
17084
+ return node;
17085
+ }
17086
+ break;
17087
+ case PM_RANGE_NODE:
17088
+ // Range operators are non-associative, so that it does not
17089
+ // associate with other range operators (i.e. `..1..` should be
17090
+ // rejected.) For this reason, we check such a case for unary ranges
17091
+ // here, and if so, it returns the node immediately,
17092
+ if ((((pm_range_node_t *) node)->left == NULL) && pm_binding_powers[parser->current.type].left >= PM_BINDING_POWER_RANGE) {
17093
+ return node;
17094
+ }
17095
+ break;
17096
+ default:
17097
+ break;
16749
17098
  }
16750
17099
 
16751
17100
  // Otherwise we'll look and see if the next token can be parsed as an infix
@@ -16756,12 +17105,68 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagn
16756
17105
  binding_power <= current_binding_powers.left &&
16757
17106
  current_binding_powers.binary
16758
17107
  ) {
16759
- node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right);
16760
- if (
16761
- current_binding_powers.nonassoc &&
16762
- current_binding_powers.right <= pm_binding_powers[parser->current.type].left
16763
- ) {
16764
- break;
17108
+ node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call);
17109
+ if (current_binding_powers.nonassoc) {
17110
+ bool endless_range_p = PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL;
17111
+ pm_binding_power_t left = endless_range_p ? PM_BINDING_POWER_TERM : current_binding_powers.left;
17112
+ if (
17113
+ left <= pm_binding_powers[parser->current.type].left ||
17114
+ // Exceptionally to operator precedences, '1.. & 2' is rejected.
17115
+ // '1.. || 2' is also an exception, but it is handled by the lexer.
17116
+ // (Here, parser->current is PM_TOKEN_PIPE, not PM_TOKEN_PIPE_PIPE).
17117
+ (endless_range_p && match1(parser, PM_TOKEN_AMPERSAND))
17118
+ ) {
17119
+ break;
17120
+ }
17121
+ }
17122
+ if (accepts_command_call) {
17123
+ // A command-style method call is only accepted on method chains.
17124
+ // Thus, we check whether the parsed node can continue method chains.
17125
+ // The method chain can continue if the parsed node is one of the following five kinds:
17126
+ // (1) index access: foo[1]
17127
+ // (2) attribute access: foo.bar
17128
+ // (3) method call with parenthesis: foo.bar(1)
17129
+ // (4) method call with a block: foo.bar do end
17130
+ // (5) constant path: foo::Bar
17131
+ switch (node->type) {
17132
+ case PM_CALL_NODE: {
17133
+ pm_call_node_t *cast = (pm_call_node_t *)node;
17134
+ if (
17135
+ // (1) foo[1]
17136
+ !(
17137
+ cast->call_operator_loc.start == NULL &&
17138
+ cast->message_loc.start != NULL &&
17139
+ cast->message_loc.start[0] == '[' &&
17140
+ cast->message_loc.end[-1] == ']'
17141
+ ) &&
17142
+ // (2) foo.bar
17143
+ !(
17144
+ cast->call_operator_loc.start != NULL &&
17145
+ cast->arguments == NULL &&
17146
+ cast->block == NULL &&
17147
+ cast->opening_loc.start == NULL
17148
+ ) &&
17149
+ // (3) foo.bar(1)
17150
+ !(
17151
+ cast->call_operator_loc.start != NULL &&
17152
+ cast->opening_loc.start != NULL
17153
+ ) &&
17154
+ // (4) foo.bar do end
17155
+ !(
17156
+ cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
17157
+ )
17158
+ ) {
17159
+ accepts_command_call = false;
17160
+ }
17161
+ break;
17162
+ }
17163
+ // (5) foo::Bar
17164
+ case PM_CONSTANT_PATH_NODE:
17165
+ break;
17166
+ default:
17167
+ accepts_command_call = false;
17168
+ break;
17169
+ }
16765
17170
  }
16766
17171
  }
16767
17172
 
@@ -16825,9 +17230,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
16825
17230
  .error_list = { 0 },
16826
17231
  .current_scope = NULL,
16827
17232
  .current_context = NULL,
16828
- .encoding = pm_encoding_utf_8,
17233
+ .encoding = PM_ENCODING_UTF_8_ENTRY,
16829
17234
  .encoding_changed_callback = NULL,
16830
- .encoding_decode_callback = NULL,
16831
17235
  .encoding_comment_start = source,
16832
17236
  .lex_callback = NULL,
16833
17237
  .filepath_string = { 0 },
@@ -16836,11 +17240,13 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
16836
17240
  .integer_base = 0,
16837
17241
  .current_string = PM_STRING_EMPTY,
16838
17242
  .start_line = 1,
17243
+ .explicit_encoding = NULL,
16839
17244
  .command_start = true,
16840
17245
  .recovering = false,
16841
17246
  .encoding_changed = false,
16842
17247
  .pattern_matching_newlines = false,
16843
17248
  .in_keyword_arg = false,
17249
+ .current_param_name = 0,
16844
17250
  .semantic_token_seen = false,
16845
17251
  .frozen_string_literal = false,
16846
17252
  .suppress_warnings = false
@@ -16875,9 +17281,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
16875
17281
  parser->filepath_string = options->filepath;
16876
17282
 
16877
17283
  // line option
16878
- if (options->line > 0) {
16879
- parser->start_line = options->line;
16880
- }
17284
+ parser->start_line = options->line;
16881
17285
 
16882
17286
  // encoding option
16883
17287
  size_t encoding_length = pm_string_length(&options->encoding);
@@ -16943,18 +17347,6 @@ pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_ch
16943
17347
  parser->encoding_changed_callback = callback;
16944
17348
  }
16945
17349
 
16946
- /**
16947
- * Register a callback that will be called when prism encounters a magic comment
16948
- * with an encoding referenced that it doesn't understand. The callback should
16949
- * return NULL if it also doesn't understand the encoding or it should return a
16950
- * pointer to a pm_encoding_t struct that contains the functions necessary to
16951
- * parse identifiers.
16952
- */
16953
- PRISM_EXPORTED_FUNCTION void
16954
- pm_parser_register_encoding_decode_callback(pm_parser_t *parser, pm_encoding_decode_callback_t callback) {
16955
- parser->encoding_decode_callback = callback;
16956
- }
16957
-
16958
17350
  /**
16959
17351
  * Free all of the memory associated with the comment list.
16960
17352
  */
@@ -17046,7 +17438,7 @@ pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
17046
17438
  PRISM_EXPORTED_FUNCTION void
17047
17439
  pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
17048
17440
  pm_options_t options = { 0 };
17049
- if (data != NULL) pm_options_read(&options, data);
17441
+ pm_options_read(&options, data);
17050
17442
 
17051
17443
  pm_parser_t parser;
17052
17444
  pm_parser_init(&parser, source, size, &options);
@@ -17068,15 +17460,15 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
17068
17460
  PRISM_EXPORTED_FUNCTION void
17069
17461
  pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
17070
17462
  pm_options_t options = { 0 };
17071
- if (data != NULL) pm_options_read(&options, data);
17463
+ pm_options_read(&options, data);
17072
17464
 
17073
17465
  pm_parser_t parser;
17074
17466
  pm_parser_init(&parser, source, size, &options);
17075
17467
 
17076
17468
  pm_node_t *node = pm_parse(&parser);
17077
17469
  pm_serialize_header(buffer);
17078
- pm_serialize_encoding(&parser.encoding, buffer);
17079
- pm_buffer_append_varint(buffer, parser.start_line);
17470
+ pm_serialize_encoding(parser.encoding, buffer);
17471
+ pm_buffer_append_varsint(buffer, parser.start_line);
17080
17472
  pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
17081
17473
 
17082
17474
  pm_node_destroy(&parser, node);