prism 0.18.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +31 -1
  3. data/README.md +2 -1
  4. data/config.yml +188 -55
  5. data/docs/building.md +9 -2
  6. data/docs/configuration.md +10 -9
  7. data/docs/encoding.md +24 -56
  8. data/docs/local_variable_depth.md +229 -0
  9. data/docs/ruby_api.md +2 -0
  10. data/docs/serialization.md +18 -13
  11. data/ext/prism/api_node.c +337 -195
  12. data/ext/prism/extconf.rb +13 -7
  13. data/ext/prism/extension.c +96 -32
  14. data/ext/prism/extension.h +1 -1
  15. data/include/prism/ast.h +340 -137
  16. data/include/prism/defines.h +17 -0
  17. data/include/prism/diagnostic.h +11 -5
  18. data/include/prism/encoding.h +248 -0
  19. data/include/prism/options.h +2 -2
  20. data/include/prism/parser.h +62 -42
  21. data/include/prism/regexp.h +2 -2
  22. data/include/prism/util/pm_buffer.h +9 -1
  23. data/include/prism/util/pm_memchr.h +2 -2
  24. data/include/prism/util/pm_strpbrk.h +3 -3
  25. data/include/prism/version.h +2 -2
  26. data/include/prism.h +13 -15
  27. data/lib/prism/compiler.rb +12 -0
  28. data/lib/prism/debug.rb +9 -4
  29. data/lib/prism/desugar_compiler.rb +3 -3
  30. data/lib/prism/dispatcher.rb +56 -0
  31. data/lib/prism/dot_visitor.rb +476 -198
  32. data/lib/prism/dsl.rb +66 -46
  33. data/lib/prism/ffi.rb +16 -3
  34. data/lib/prism/lex_compat.rb +19 -9
  35. data/lib/prism/mutation_compiler.rb +20 -0
  36. data/lib/prism/node.rb +1173 -450
  37. data/lib/prism/node_ext.rb +41 -16
  38. data/lib/prism/parse_result.rb +12 -15
  39. data/lib/prism/ripper_compat.rb +49 -34
  40. data/lib/prism/serialize.rb +242 -212
  41. data/lib/prism/visitor.rb +12 -0
  42. data/lib/prism.rb +20 -4
  43. data/prism.gemspec +4 -10
  44. data/rbi/prism.rbi +605 -230
  45. data/rbi/prism_static.rbi +3 -0
  46. data/sig/prism.rbs +379 -124
  47. data/sig/prism_static.rbs +1 -0
  48. data/src/diagnostic.c +228 -222
  49. data/src/encoding.c +5137 -0
  50. data/src/node.c +66 -0
  51. data/src/options.c +21 -2
  52. data/src/prettyprint.c +806 -406
  53. data/src/prism.c +1092 -700
  54. data/src/regexp.c +3 -3
  55. data/src/serialize.c +227 -157
  56. data/src/util/pm_buffer.c +10 -1
  57. data/src/util/pm_memchr.c +1 -1
  58. data/src/util/pm_strpbrk.c +4 -4
  59. metadata +5 -11
  60. data/include/prism/enc/pm_encoding.h +0 -227
  61. data/src/enc/pm_big5.c +0 -116
  62. data/src/enc/pm_cp51932.c +0 -57
  63. data/src/enc/pm_euc_jp.c +0 -69
  64. data/src/enc/pm_gbk.c +0 -65
  65. data/src/enc/pm_shift_jis.c +0 -57
  66. data/src/enc/pm_tables.c +0 -2073
  67. data/src/enc/pm_unicode.c +0 -2369
  68. data/src/enc/pm_windows_31j.c +0 -57
data/src/prism.c CHANGED
@@ -40,6 +40,7 @@ debug_context(pm_context_t context) {
40
40
  case PM_CONTEXT_DEF_PARAMS: return "DEF_PARAMS";
41
41
  case PM_CONTEXT_DEFAULT_PARAMS: return "DEFAULT_PARAMS";
42
42
  case PM_CONTEXT_ENSURE: return "ENSURE";
43
+ case PM_CONTEXT_ENSURE_DEF: return "ENSURE_DEF";
43
44
  case PM_CONTEXT_ELSE: return "ELSE";
44
45
  case PM_CONTEXT_ELSIF: return "ELSIF";
45
46
  case PM_CONTEXT_EMBEXPR: return "EMBEXPR";
@@ -56,6 +57,8 @@ debug_context(pm_context_t context) {
56
57
  case PM_CONTEXT_PREEXE: return "PREEXE";
57
58
  case PM_CONTEXT_RESCUE: return "RESCUE";
58
59
  case PM_CONTEXT_RESCUE_ELSE: return "RESCUE_ELSE";
60
+ case PM_CONTEXT_RESCUE_ELSE_DEF: return "RESCUE_ELSE_DEF";
61
+ case PM_CONTEXT_RESCUE_DEF: return "RESCUE_DEF";
59
62
  case PM_CONTEXT_SCLASS: return "SCLASS";
60
63
  case PM_CONTEXT_UNLESS: return "UNLESS";
61
64
  case PM_CONTEXT_UNTIL: return "UNTIL";
@@ -272,6 +275,7 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
272
275
  breakpoints[index++] = incrementor;
273
276
  }
274
277
 
278
+ parser->explicit_encoding = NULL;
275
279
  return lex_mode_push(parser, lex_mode);
276
280
  }
277
281
 
@@ -353,6 +357,7 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed
353
357
  breakpoints[index++] = incrementor;
354
358
  }
355
359
 
360
+ parser->explicit_encoding = NULL;
356
361
  return lex_mode_push(parser, lex_mode);
357
362
  }
358
363
 
@@ -536,7 +541,7 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
536
541
  * Append an error to the list of errors on the parser using the location of the
537
542
  * given token and a format string.
538
543
  */
539
- #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, token->start, token->end, diag_id, __VA_ARGS__)
544
+ #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (token).start, (token).end, diag_id, __VA_ARGS__)
540
545
 
541
546
  /**
542
547
  * Append a warning to the list of warnings on the parser.
@@ -776,8 +781,7 @@ pm_conditional_predicate(pm_node_t *node) {
776
781
  * parentheses. In these cases we set the token to the "not provided" type. For
777
782
  * example:
778
783
  *
779
- * pm_token_t token;
780
- * not_provided(&token, parser->previous.end);
784
+ * pm_token_t token = not_provided(parser);
781
785
  */
782
786
  static inline pm_token_t
783
787
  not_provided(pm_parser_t *parser) {
@@ -860,6 +864,27 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
860
864
  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
861
865
  }
862
866
 
867
+ /******************************************************************************/
868
+ /* Node flag handling functions */
869
+ /******************************************************************************/
870
+
871
+ /**
872
+ * Set the given flag on the given node.
873
+ */
874
+ static inline void
875
+ pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
876
+ node->flags |= flag;
877
+ }
878
+
879
+ /**
880
+ * Remove the given flag from the given node.
881
+ */
882
+ static inline void
883
+ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
884
+ node->flags &= (pm_node_flags_t) ~flag;
885
+ }
886
+
887
+
863
888
  /******************************************************************************/
864
889
  /* Node creation functions */
865
890
  /******************************************************************************/
@@ -1148,8 +1173,12 @@ pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
1148
1173
 
1149
1174
  // If the element is not a static literal, then the array is not a static
1150
1175
  // literal. Turn that flag off.
1151
- if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || (element->flags & PM_NODE_FLAG_STATIC_LITERAL) == 0) {
1152
- node->base.flags &= (pm_node_flags_t) ~PM_NODE_FLAG_STATIC_LITERAL;
1176
+ if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
1177
+ pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
1178
+ }
1179
+
1180
+ if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
1181
+ pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
1153
1182
  }
1154
1183
  }
1155
1184
 
@@ -1193,7 +1222,7 @@ pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *node
1193
1222
  for (size_t index = 0; index < nodes->size; index++) {
1194
1223
  pm_node_t *child = nodes->nodes[index];
1195
1224
 
1196
- if (!found_rest && PM_NODE_TYPE_P(child, PM_SPLAT_NODE)) {
1225
+ if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
1197
1226
  node->rest = child;
1198
1227
  found_rest = true;
1199
1228
  } else if (found_rest) {
@@ -1461,7 +1490,7 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p
1461
1490
  * Allocate and initialize a new BlockNode node.
1462
1491
  */
1463
1492
  static pm_block_node_t *
1464
- pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_block_parameters_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
1493
+ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_t locals_body_index, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
1465
1494
  pm_block_node_t *node = PM_ALLOC_NODE(parser, pm_block_node_t);
1466
1495
 
1467
1496
  *node = (pm_block_node_t) {
@@ -1470,6 +1499,7 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p
1470
1499
  .location = { .start = opening->start, .end = closing->end },
1471
1500
  },
1472
1501
  .locals = *locals,
1502
+ .locals_body_index = locals_body_index,
1473
1503
  .parameters = parameters,
1474
1504
  .body = body,
1475
1505
  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
@@ -1711,7 +1741,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
1711
1741
  node->block = arguments->block;
1712
1742
 
1713
1743
  if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
1714
- node->base.flags |= PM_CALL_NODE_FLAGS_SAFE_NAVIGATION;
1744
+ pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
1715
1745
  }
1716
1746
 
1717
1747
  node->name = pm_parser_constant_id_token(parser, message);
@@ -1785,7 +1815,7 @@ pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token
1785
1815
  node->block = arguments->block;
1786
1816
 
1787
1817
  if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
1788
- node->base.flags |= PM_CALL_NODE_FLAGS_SAFE_NAVIGATION;
1818
+ pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
1789
1819
  }
1790
1820
 
1791
1821
  node->name = pm_parser_constant_id_constant(parser, "call", 4);
@@ -1832,12 +1862,12 @@ pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
1832
1862
  */
1833
1863
  static inline bool
1834
1864
  pm_call_node_variable_call_p(pm_call_node_t *node) {
1835
- return node->base.flags & PM_CALL_NODE_FLAGS_VARIABLE_CALL;
1865
+ return PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
1836
1866
  }
1837
1867
 
1838
1868
  /**
1839
- * Returns whether or not this call is to the [] method in the index form (as
1840
- * opposed to `foo.[]`).
1869
+ * Returns whether or not this call is to the [] method in the index form without a block (as
1870
+ * opposed to `foo.[]` and `foo[] { }`).
1841
1871
  */
1842
1872
  static inline bool
1843
1873
  pm_call_node_index_p(pm_call_node_t *node) {
@@ -1845,7 +1875,8 @@ pm_call_node_index_p(pm_call_node_t *node) {
1845
1875
  (node->call_operator_loc.start == NULL) &&
1846
1876
  (node->message_loc.start != NULL) &&
1847
1877
  (node->message_loc.start[0] == '[') &&
1848
- (node->message_loc.end[-1] == ']')
1878
+ (node->message_loc.end[-1] == ']') &&
1879
+ (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE))
1849
1880
  );
1850
1881
  }
1851
1882
 
@@ -2101,6 +2132,63 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
2101
2132
  return node;
2102
2133
  }
2103
2134
 
2135
+ /**
2136
+ * Allocate and initialize a new CallTargetNode node from an existing call
2137
+ * node.
2138
+ */
2139
+ static pm_call_target_node_t *
2140
+ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
2141
+ pm_call_target_node_t *node = PM_ALLOC_NODE(parser, pm_call_target_node_t);
2142
+
2143
+ *node = (pm_call_target_node_t) {
2144
+ {
2145
+ .type = PM_CALL_TARGET_NODE,
2146
+ .flags = target->base.flags,
2147
+ .location = target->base.location
2148
+ },
2149
+ .receiver = target->receiver,
2150
+ .call_operator_loc = target->call_operator_loc,
2151
+ .name = target->name,
2152
+ .message_loc = target->message_loc
2153
+ };
2154
+
2155
+ // Here we're going to free the target, since it is no longer necessary.
2156
+ // However, we don't want to call `pm_node_destroy` because we want to keep
2157
+ // around all of its children since we just reused them.
2158
+ free(target);
2159
+
2160
+ return node;
2161
+ }
2162
+
2163
+ /**
2164
+ * Allocate and initialize a new IndexTargetNode node from an existing call
2165
+ * node.
2166
+ */
2167
+ static pm_index_target_node_t *
2168
+ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
2169
+ pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
2170
+
2171
+ *node = (pm_index_target_node_t) {
2172
+ {
2173
+ .type = PM_INDEX_TARGET_NODE,
2174
+ .flags = target->base.flags,
2175
+ .location = target->base.location
2176
+ },
2177
+ .receiver = target->receiver,
2178
+ .opening_loc = target->opening_loc,
2179
+ .arguments = target->arguments,
2180
+ .closing_loc = target->closing_loc,
2181
+ .block = target->block
2182
+ };
2183
+
2184
+ // Here we're going to free the target, since it is no longer necessary.
2185
+ // However, we don't want to call `pm_node_destroy` because we want to keep
2186
+ // around all of its children since we just reused them.
2187
+ free(target);
2188
+
2189
+ return node;
2190
+ }
2191
+
2104
2192
  /**
2105
2193
  * Allocate and initialize a new CapturePatternNode node.
2106
2194
  */
@@ -2452,6 +2540,8 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
2452
2540
  */
2453
2541
  static pm_constant_path_node_t *
2454
2542
  pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, pm_node_t *child) {
2543
+ pm_assert_value_expression(parser, parent);
2544
+
2455
2545
  pm_constant_path_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_node_t);
2456
2546
 
2457
2547
  *node = (pm_constant_path_node_t) {
@@ -2622,6 +2712,7 @@ pm_def_node_create(
2622
2712
  pm_parameters_node_t *parameters,
2623
2713
  pm_node_t *body,
2624
2714
  pm_constant_id_list_t *locals,
2715
+ uint32_t locals_body_index,
2625
2716
  const pm_token_t *def_keyword,
2626
2717
  const pm_token_t *operator,
2627
2718
  const pm_token_t *lparen,
@@ -2649,6 +2740,7 @@ pm_def_node_create(
2649
2740
  .parameters = parameters,
2650
2741
  .body = body,
2651
2742
  .locals = *locals,
2743
+ .locals_body_index = locals_body_index,
2652
2744
  .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
2653
2745
  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2654
2746
  .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
@@ -3256,10 +3348,16 @@ static inline void
3256
3348
  pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
3257
3349
  pm_node_list_append(&hash->elements, element);
3258
3350
 
3259
- // If the element is not a static literal, then the hash is not a static
3260
- // literal. Turn that flag off.
3261
- if ((element->flags & PM_NODE_FLAG_STATIC_LITERAL) == 0) {
3262
- hash->base.flags &= (pm_node_flags_t) ~PM_NODE_FLAG_STATIC_LITERAL;
3351
+ bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
3352
+ if (static_literal) {
3353
+ pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
3354
+ static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
3355
+ static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
3356
+ static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
3357
+ }
3358
+
3359
+ if (!static_literal) {
3360
+ pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
3263
3361
  }
3264
3362
  }
3265
3363
 
@@ -3416,6 +3514,25 @@ pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
3416
3514
  return node;
3417
3515
  }
3418
3516
 
3517
+ /**
3518
+ * Allocate and initialize a new ImplicitRestNode node.
3519
+ */
3520
+ static pm_implicit_rest_node_t *
3521
+ pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
3522
+ assert(token->type == PM_TOKEN_COMMA);
3523
+
3524
+ pm_implicit_rest_node_t *node = PM_ALLOC_NODE(parser, pm_implicit_rest_node_t);
3525
+
3526
+ *node = (pm_implicit_rest_node_t) {
3527
+ {
3528
+ .type = PM_IMPLICIT_REST_NODE,
3529
+ .location = PM_LOCATION_TOKEN_VALUE(token)
3530
+ }
3531
+ };
3532
+
3533
+ return node;
3534
+ }
3535
+
3419
3536
  /**
3420
3537
  * Allocate and initialize a new IntegerNode node.
3421
3538
  */
@@ -3697,7 +3814,7 @@ static inline void
3697
3814
  pm_interpolated_regular_expression_node_closing_set(pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
3698
3815
  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
3699
3816
  node->base.location.end = closing->end;
3700
- node->base.flags |= pm_regular_expression_flags_create(closing);
3817
+ pm_node_flag_set((pm_node_t *)node, pm_regular_expression_flags_create(closing));
3701
3818
  }
3702
3819
 
3703
3820
  /**
@@ -3831,7 +3948,8 @@ pm_keyword_hash_node_create(pm_parser_t *parser) {
3831
3948
  *node = (pm_keyword_hash_node_t) {
3832
3949
  .base = {
3833
3950
  .type = PM_KEYWORD_HASH_NODE,
3834
- .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
3951
+ .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3952
+ .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS
3835
3953
  },
3836
3954
  .elements = { 0 }
3837
3955
  };
@@ -3844,6 +3962,13 @@ pm_keyword_hash_node_create(pm_parser_t *parser) {
3844
3962
  */
3845
3963
  static void
3846
3964
  pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
3965
+ // If the element being added is not an AssocNode or does not have a symbol key, then
3966
+ // we want to turn the STATIC_KEYS flag off.
3967
+ // TODO: Rename the flag to SYMBOL_KEYS instead.
3968
+ if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
3969
+ pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
3970
+ }
3971
+
3847
3972
  pm_node_list_append(&hash->elements, element);
3848
3973
  if (hash->base.location.start == NULL) {
3849
3974
  hash->base.location.start = element->location.start;
@@ -3926,10 +4051,11 @@ static pm_lambda_node_t *
3926
4051
  pm_lambda_node_create(
3927
4052
  pm_parser_t *parser,
3928
4053
  pm_constant_id_list_t *locals,
4054
+ uint32_t locals_body_index,
3929
4055
  const pm_token_t *operator,
3930
4056
  const pm_token_t *opening,
3931
4057
  const pm_token_t *closing,
3932
- pm_block_parameters_node_t *parameters,
4058
+ pm_node_t *parameters,
3933
4059
  pm_node_t *body
3934
4060
  ) {
3935
4061
  pm_lambda_node_t *node = PM_ALLOC_NODE(parser, pm_lambda_node_t);
@@ -3943,6 +4069,7 @@ pm_lambda_node_create(
3943
4069
  },
3944
4070
  },
3945
4071
  .locals = *locals,
4072
+ .locals_body_index = locals_body_index,
3946
4073
  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3947
4074
  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3948
4075
  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
@@ -4038,6 +4165,12 @@ pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, c
4038
4165
  */
4039
4166
  static pm_local_variable_read_node_t *
4040
4167
  pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
4168
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
4169
+
4170
+ if (parser->current_param_name == name_id) {
4171
+ pm_parser_err_token(parser, name, PM_ERR_PARAMETER_CIRCULAR);
4172
+ }
4173
+
4041
4174
  pm_local_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_read_node_t);
4042
4175
 
4043
4176
  *node = (pm_local_variable_read_node_t) {
@@ -4045,7 +4178,7 @@ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name,
4045
4178
  .type = PM_LOCAL_VARIABLE_READ_NODE,
4046
4179
  .location = PM_LOCATION_TOKEN_VALUE(name)
4047
4180
  },
4048
- .name = pm_parser_constant_id_token(parser, name),
4181
+ .name = name_id,
4049
4182
  .depth = depth
4050
4183
  };
4051
4184
 
@@ -4132,6 +4265,21 @@ pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name
4132
4265
  );
4133
4266
  }
4134
4267
 
4268
+ /**
4269
+ * Allocate and initialize a new LocalVariableTargetNode node with the given depth.
4270
+ */
4271
+ static pm_local_variable_target_node_t *
4272
+ pm_local_variable_target_node_create_depth(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
4273
+ pm_refute_numbered_parameter(parser, name->start, name->end);
4274
+
4275
+ return pm_local_variable_target_node_create_values(
4276
+ parser,
4277
+ &(pm_location_t) { .start = name->start, .end = name->end },
4278
+ pm_parser_constant_id_token(parser, name),
4279
+ depth
4280
+ );
4281
+ }
4282
+
4135
4283
  /**
4136
4284
  * Allocate and initialize a new MatchPredicateNode node.
4137
4285
  */
@@ -4254,7 +4402,7 @@ pm_multi_target_node_create(pm_parser_t *parser) {
4254
4402
  */
4255
4403
  static void
4256
4404
  pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
4257
- if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
4405
+ if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE) || PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
4258
4406
  if (node->rest == NULL) {
4259
4407
  node->rest = target;
4260
4408
  } else {
@@ -4390,7 +4538,25 @@ pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *oper
4390
4538
  }
4391
4539
 
4392
4540
  /**
4393
- * Allocate a new NthReferenceReadNode node.
4541
+ * Allocate and initialize a new NumberedParametersNode node.
4542
+ */
4543
+ static pm_numbered_parameters_node_t *
4544
+ pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
4545
+ pm_numbered_parameters_node_t *node = PM_ALLOC_NODE(parser, pm_numbered_parameters_node_t);
4546
+
4547
+ *node = (pm_numbered_parameters_node_t) {
4548
+ {
4549
+ .type = PM_NUMBERED_PARAMETERS_NODE,
4550
+ .location = *location
4551
+ },
4552
+ .maximum = maximum
4553
+ };
4554
+
4555
+ return node;
4556
+ }
4557
+
4558
+ /**
4559
+ * Allocate and initialize a new NthReferenceReadNode node.
4394
4560
  */
4395
4561
  static pm_numbered_reference_read_node_t *
4396
4562
  pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
@@ -4530,9 +4696,8 @@ pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param)
4530
4696
  * Set the rest parameter on a ParametersNode node.
4531
4697
  */
4532
4698
  static void
4533
- pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_rest_parameter_node_t *param) {
4534
- assert(params->rest == NULL);
4535
- pm_parameters_node_location_set(params, (pm_node_t *) param);
4699
+ pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
4700
+ pm_parameters_node_location_set(params, param);
4536
4701
  params->rest = param;
4537
4702
  }
4538
4703
 
@@ -5124,7 +5289,7 @@ pm_statements_node_body_append(pm_statements_node_t *node, pm_node_t *statement)
5124
5289
  pm_node_list_append(&node->body, statement);
5125
5290
 
5126
5291
  // Every statement gets marked as a place where a newline can occur.
5127
- statement->flags |= PM_NODE_FLAG_NEWLINE;
5292
+ pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
5128
5293
  }
5129
5294
 
5130
5295
  /**
@@ -5643,6 +5808,7 @@ pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening,
5643
5808
  *node = (pm_x_string_node_t) {
5644
5809
  {
5645
5810
  .type = PM_X_STRING_NODE,
5811
+ .flags = PM_STRING_FLAGS_FROZEN,
5646
5812
  .location = {
5647
5813
  .start = opening->start,
5648
5814
  .end = closing->end
@@ -5718,8 +5884,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
5718
5884
  .previous = parser->current_scope,
5719
5885
  .closed = closed,
5720
5886
  .explicit_params = false,
5721
- .numbered_params = false,
5722
- .transparent = false
5887
+ .numbered_parameters = 0,
5723
5888
  };
5724
5889
 
5725
5890
  pm_constant_id_list_init(&scope->locals);
@@ -5728,27 +5893,6 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
5728
5893
  return true;
5729
5894
  }
5730
5895
 
5731
- /**
5732
- * Allocate and initialize a new scope. Push it onto the scope stack.
5733
- */
5734
- static bool
5735
- pm_parser_scope_push_transparent(pm_parser_t *parser) {
5736
- pm_scope_t *scope = (pm_scope_t *) malloc(sizeof(pm_scope_t));
5737
- if (scope == NULL) return false;
5738
-
5739
- *scope = (pm_scope_t) {
5740
- .previous = parser->current_scope,
5741
- .closed = false,
5742
- .explicit_params = false,
5743
- .numbered_params = false,
5744
- .transparent = true
5745
- };
5746
-
5747
- parser->current_scope = scope;
5748
-
5749
- return true;
5750
- }
5751
-
5752
5896
  /**
5753
5897
  * Check if any of the currently visible scopes contain a local variable
5754
5898
  * described by the given constant id.
@@ -5759,7 +5903,7 @@ pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant
5759
5903
  int depth = 0;
5760
5904
 
5761
5905
  while (scope != NULL) {
5762
- if (!scope->transparent && pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
5906
+ if (pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
5763
5907
  if (scope->closed) break;
5764
5908
 
5765
5909
  scope = scope->previous;
@@ -5784,15 +5928,19 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
5784
5928
  */
5785
5929
  static inline void
5786
5930
  pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id) {
5787
- pm_scope_t *scope = parser->current_scope;
5788
- while (scope && scope->transparent) scope = scope->previous;
5789
-
5790
- assert(scope != NULL);
5791
- if (!pm_constant_id_list_includes(&scope->locals, constant_id)) {
5792
- pm_constant_id_list_append(&scope->locals, constant_id);
5931
+ if (!pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
5932
+ pm_constant_id_list_append(&parser->current_scope->locals, constant_id);
5793
5933
  }
5794
5934
  }
5795
5935
 
5936
+ /**
5937
+ * Set the numbered_parameters value of the current scope.
5938
+ */
5939
+ static inline void
5940
+ pm_parser_numbered_parameters_set(pm_parser_t *parser, uint8_t numbered_parameters) {
5941
+ parser->current_scope->numbered_parameters = numbered_parameters;
5942
+ }
5943
+
5796
5944
  /**
5797
5945
  * Add a local variable from a location to the current scope.
5798
5946
  */
@@ -5869,12 +6017,12 @@ static inline size_t
5869
6017
  char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
5870
6018
  if (parser->encoding_changed) {
5871
6019
  size_t width;
5872
- if ((width = parser->encoding.alpha_char(b, parser->end - b)) != 0) {
6020
+ if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
5873
6021
  return width;
5874
6022
  } else if (*b == '_') {
5875
6023
  return 1;
5876
6024
  } else if (*b >= 0x80) {
5877
- return parser->encoding.char_width(b, parser->end - b);
6025
+ return parser->encoding->char_width(b, parser->end - b);
5878
6026
  } else {
5879
6027
  return 0;
5880
6028
  }
@@ -5885,6 +6033,19 @@ char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
5885
6033
  }
5886
6034
  }
5887
6035
 
6036
+ /**
6037
+ * Similar to char_is_identifier but this function assumes that the encoding
6038
+ * has not been changed.
6039
+ */
6040
+ static inline size_t
6041
+ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
6042
+ if (*b < 0x80) {
6043
+ return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
6044
+ } else {
6045
+ return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
6046
+ }
6047
+ }
6048
+
5888
6049
  /**
5889
6050
  * Like the above, this function is also used extremely frequently to lex all of
5890
6051
  * the identifiers in a source file once the first character has been found. So
@@ -5894,20 +6055,17 @@ static inline size_t
5894
6055
  char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
5895
6056
  if (parser->encoding_changed) {
5896
6057
  size_t width;
5897
- if ((width = parser->encoding.alnum_char(b, parser->end - b)) != 0) {
6058
+ if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
5898
6059
  return width;
5899
6060
  } else if (*b == '_') {
5900
6061
  return 1;
5901
6062
  } else if (*b >= 0x80) {
5902
- return parser->encoding.char_width(b, parser->end - b);
6063
+ return parser->encoding->char_width(b, parser->end - b);
5903
6064
  } else {
5904
6065
  return 0;
5905
6066
  }
5906
- } else if (*b < 0x80) {
5907
- return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_');
5908
- } else {
5909
- return (size_t) (pm_encoding_utf_8_alnum_char(b, parser->end - b) || 1u);
5910
6067
  }
6068
+ return char_is_identifier_utf8(b, parser->end);
5911
6069
  }
5912
6070
 
5913
6071
  // Here we're defining a perfect hash for the characters that are allowed in
@@ -6082,195 +6240,18 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
6082
6240
  */
6083
6241
  static bool
6084
6242
  parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
6085
- size_t width = (size_t) (end - start);
6086
-
6087
- // First, we're going to call out to a user-defined callback if one was
6088
- // provided. If they return an encoding struct that we can use, then we'll
6089
- // use that here.
6090
- if (parser->encoding_decode_callback != NULL) {
6091
- pm_encoding_t *encoding = parser->encoding_decode_callback(parser, start, width);
6243
+ const pm_encoding_t *encoding = pm_encoding_find(start, end);
6092
6244
 
6093
- if (encoding != NULL) {
6094
- parser->encoding = *encoding;
6095
- return true;
6096
- }
6097
- }
6098
-
6099
- // Next, we're going to check for UTF-8. This is the most common encoding.
6100
- // utf-8 can contain extra information at the end about the platform it is
6101
- // encoded on, such as utf-8-mac or utf-8-unix. We'll ignore those suffixes.
6102
- if ((start + 5 <= end) && (pm_strncasecmp(start, (const uint8_t *) "utf-8", 5) == 0)) {
6103
- // We need to explicitly handle utf-8-hfs, as that one needs to switch
6104
- // over to being utf8-mac.
6105
- if (width == 9 && (pm_strncasecmp(start + 5, (const uint8_t *) "-hfs", 4) == 0)) {
6106
- parser->encoding = pm_encoding_utf8_mac;
6245
+ if (encoding != NULL) {
6246
+ if (encoding != PM_ENCODING_UTF_8_ENTRY) {
6247
+ parser->encoding = encoding;
6107
6248
  parser->encoding_changed = true;
6108
6249
  if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
6109
- return true;
6110
6250
  }
6111
6251
 
6112
- // We don't need to do anything here because the default encoding is
6113
- // already UTF-8. We'll just return.
6114
6252
  return true;
6115
6253
  }
6116
6254
 
6117
- // Next, we're going to loop through each of the encodings that we handle
6118
- // explicitly. If we found one that we understand, we'll use that value.
6119
- #define ENCODING1(value, prebuilt) \
6120
- if (width == sizeof(value) - 1 && start + width <= end && pm_strncasecmp(start, (const uint8_t *) value, width) == 0) { \
6121
- parser->encoding = prebuilt; \
6122
- parser->encoding_changed = true; \
6123
- if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
6124
- return true; \
6125
- }
6126
-
6127
- // A convenience macros for comparing two aliases for the same encoding.
6128
- #define ENCODING2(value1, value2, prebuilt) ENCODING1(value1, prebuilt) ENCODING1(value2, prebuilt)
6129
-
6130
- if (width >= 3) {
6131
- switch (*start) {
6132
- case 'A': case 'a':
6133
- ENCODING1("ASCII", pm_encoding_ascii);
6134
- ENCODING1("ASCII-8BIT", pm_encoding_ascii_8bit);
6135
- ENCODING1("ANSI_X3.4-1968", pm_encoding_ascii);
6136
- break;
6137
- case 'B': case 'b':
6138
- ENCODING1("BINARY", pm_encoding_ascii_8bit);
6139
- ENCODING1("Big5", pm_encoding_big5);
6140
- ENCODING1("Big5-HKSCS", pm_encoding_big5_hkscs);
6141
- ENCODING1("Big5-UAO", pm_encoding_big5_uao);
6142
- break;
6143
- case 'C': case 'c':
6144
- ENCODING1("CP437", pm_encoding_ibm437);
6145
- ENCODING1("CP720", pm_encoding_ibm720);
6146
- ENCODING1("CP737", pm_encoding_ibm737);
6147
- ENCODING1("CP775", pm_encoding_ibm775);
6148
- ENCODING1("CP850", pm_encoding_cp850);
6149
- ENCODING1("CP852", pm_encoding_cp852);
6150
- ENCODING1("CP855", pm_encoding_cp855);
6151
- ENCODING1("CP857", pm_encoding_ibm857);
6152
- ENCODING1("CP860", pm_encoding_ibm860);
6153
- ENCODING1("CP861", pm_encoding_ibm861);
6154
- ENCODING1("CP862", pm_encoding_ibm862);
6155
- ENCODING1("CP864", pm_encoding_ibm864);
6156
- ENCODING1("CP865", pm_encoding_ibm865);
6157
- ENCODING1("CP866", pm_encoding_ibm866);
6158
- ENCODING1("CP869", pm_encoding_ibm869);
6159
- ENCODING1("CP874", pm_encoding_windows_874);
6160
- ENCODING1("CP878", pm_encoding_koi8_r);
6161
- ENCODING1("CP863", pm_encoding_ibm863);
6162
- ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j);
6163
- ENCODING1("CP936", pm_encoding_gbk);
6164
- ENCODING1("CP1250", pm_encoding_windows_1250);
6165
- ENCODING1("CP1251", pm_encoding_windows_1251);
6166
- ENCODING1("CP1252", pm_encoding_windows_1252);
6167
- ENCODING1("CP1253", pm_encoding_windows_1253);
6168
- ENCODING1("CP1254", pm_encoding_windows_1254);
6169
- ENCODING1("CP1255", pm_encoding_windows_1255);
6170
- ENCODING1("CP1256", pm_encoding_windows_1256);
6171
- ENCODING1("CP1257", pm_encoding_windows_1257);
6172
- ENCODING1("CP1258", pm_encoding_windows_1258);
6173
- ENCODING1("CP51932", pm_encoding_cp51932);
6174
- ENCODING1("CP65001", pm_encoding_utf_8);
6175
- break;
6176
- case 'E': case 'e':
6177
- ENCODING2("EUC-JP", "eucJP", pm_encoding_euc_jp);
6178
- ENCODING1("external", pm_encoding_utf_8);
6179
- break;
6180
- case 'F': case 'f':
6181
- ENCODING1("filesystem", pm_encoding_utf_8);
6182
- break;
6183
- case 'G': case 'g':
6184
- ENCODING1("GB1988", pm_encoding_gb1988);
6185
- ENCODING1("GBK", pm_encoding_gbk);
6186
- break;
6187
- case 'I': case 'i':
6188
- ENCODING1("IBM437", pm_encoding_ibm437);
6189
- ENCODING1("IBM720", pm_encoding_ibm720);
6190
- ENCODING1("IBM737", pm_encoding_ibm737);
6191
- ENCODING1("IBM775", pm_encoding_ibm775);
6192
- ENCODING1("IBM850", pm_encoding_cp850);
6193
- ENCODING1("IBM852", pm_encoding_ibm852);
6194
- ENCODING1("IBM855", pm_encoding_ibm855);
6195
- ENCODING1("IBM857", pm_encoding_ibm857);
6196
- ENCODING1("IBM860", pm_encoding_ibm860);
6197
- ENCODING1("IBM861", pm_encoding_ibm861);
6198
- ENCODING1("IBM862", pm_encoding_ibm862);
6199
- ENCODING1("IBM863", pm_encoding_ibm863);
6200
- ENCODING1("IBM864", pm_encoding_ibm864);
6201
- ENCODING1("IBM865", pm_encoding_ibm865);
6202
- ENCODING1("IBM866", pm_encoding_ibm866);
6203
- ENCODING1("IBM869", pm_encoding_ibm869);
6204
- ENCODING2("ISO-8859-1", "ISO8859-1", pm_encoding_iso_8859_1);
6205
- ENCODING2("ISO-8859-2", "ISO8859-2", pm_encoding_iso_8859_2);
6206
- ENCODING2("ISO-8859-3", "ISO8859-3", pm_encoding_iso_8859_3);
6207
- ENCODING2("ISO-8859-4", "ISO8859-4", pm_encoding_iso_8859_4);
6208
- ENCODING2("ISO-8859-5", "ISO8859-5", pm_encoding_iso_8859_5);
6209
- ENCODING2("ISO-8859-6", "ISO8859-6", pm_encoding_iso_8859_6);
6210
- ENCODING2("ISO-8859-7", "ISO8859-7", pm_encoding_iso_8859_7);
6211
- ENCODING2("ISO-8859-8", "ISO8859-8", pm_encoding_iso_8859_8);
6212
- ENCODING2("ISO-8859-9", "ISO8859-9", pm_encoding_iso_8859_9);
6213
- ENCODING2("ISO-8859-10", "ISO8859-10", pm_encoding_iso_8859_10);
6214
- ENCODING2("ISO-8859-11", "ISO8859-11", pm_encoding_iso_8859_11);
6215
- ENCODING2("ISO-8859-13", "ISO8859-13", pm_encoding_iso_8859_13);
6216
- ENCODING2("ISO-8859-14", "ISO8859-14", pm_encoding_iso_8859_14);
6217
- ENCODING2("ISO-8859-15", "ISO8859-15", pm_encoding_iso_8859_15);
6218
- ENCODING2("ISO-8859-16", "ISO8859-16", pm_encoding_iso_8859_16);
6219
- break;
6220
- case 'K': case 'k':
6221
- ENCODING1("KOI8-R", pm_encoding_koi8_r);
6222
- break;
6223
- case 'L': case 'l':
6224
- ENCODING1("locale", pm_encoding_utf_8);
6225
- break;
6226
- case 'M': case 'm':
6227
- ENCODING1("macCentEuro", pm_encoding_mac_cent_euro);
6228
- ENCODING1("macCroatian", pm_encoding_mac_croatian);
6229
- ENCODING1("macCyrillic", pm_encoding_mac_cyrillic);
6230
- ENCODING1("macGreek", pm_encoding_mac_greek);
6231
- ENCODING1("macIceland", pm_encoding_mac_iceland);
6232
- ENCODING1("macRoman", pm_encoding_mac_roman);
6233
- ENCODING1("macRomania", pm_encoding_mac_romania);
6234
- ENCODING1("macThai", pm_encoding_mac_thai);
6235
- ENCODING1("macTurkish", pm_encoding_mac_turkish);
6236
- ENCODING1("macUkraine", pm_encoding_mac_ukraine);
6237
- break;
6238
- case 'P': case 'p':
6239
- ENCODING1("PCK", pm_encoding_windows_31j);
6240
- break;
6241
- case 'S': case 's':
6242
- ENCODING1("Shift_JIS", pm_encoding_shift_jis);
6243
- ENCODING1("SJIS", pm_encoding_windows_31j);
6244
- break;
6245
- case 'T': case 't':
6246
- ENCODING1("TIS-620", pm_encoding_tis_620);
6247
- break;
6248
- case 'U': case 'u':
6249
- ENCODING1("US-ASCII", pm_encoding_ascii);
6250
- ENCODING2("UTF8-MAC", "UTF-8-HFS", pm_encoding_utf8_mac);
6251
- break;
6252
- case 'W': case 'w':
6253
- ENCODING1("Windows-31J", pm_encoding_windows_31j);
6254
- ENCODING1("Windows-874", pm_encoding_windows_874);
6255
- ENCODING1("Windows-1250", pm_encoding_windows_1250);
6256
- ENCODING1("Windows-1251", pm_encoding_windows_1251);
6257
- ENCODING1("Windows-1252", pm_encoding_windows_1252);
6258
- ENCODING1("Windows-1253", pm_encoding_windows_1253);
6259
- ENCODING1("Windows-1254", pm_encoding_windows_1254);
6260
- ENCODING1("Windows-1255", pm_encoding_windows_1255);
6261
- ENCODING1("Windows-1256", pm_encoding_windows_1256);
6262
- ENCODING1("Windows-1257", pm_encoding_windows_1257);
6263
- ENCODING1("Windows-1258", pm_encoding_windows_1258);
6264
- break;
6265
- case '6':
6266
- ENCODING1("646", pm_encoding_ascii);
6267
- break;
6268
- }
6269
- }
6270
-
6271
- #undef ENCODING2
6272
- #undef ENCODING1
6273
-
6274
6255
  return false;
6275
6256
  }
6276
6257
 
@@ -6319,7 +6300,7 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) {
6319
6300
  }
6320
6301
 
6321
6302
  const uint8_t *value_start = cursor;
6322
- while ((*cursor == '-' || *cursor == '_' || parser->encoding.alnum_char(cursor, 1)) && ++cursor < end);
6303
+ while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
6323
6304
 
6324
6305
  if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
6325
6306
  // If we were unable to parse the encoding value, then we've got an
@@ -6353,7 +6334,7 @@ pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
6353
6334
  */
6354
6335
  static inline const uint8_t *
6355
6336
  parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
6356
- while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
6337
+ while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
6357
6338
  if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
6358
6339
  return cursor;
6359
6340
  }
@@ -6443,7 +6424,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
6443
6424
  // underscores. We only need to do this if there _is_ a dash in the key.
6444
6425
  pm_string_t key;
6445
6426
  const size_t key_length = (size_t) (key_end - key_start);
6446
- const uint8_t *dash = pm_memchr(key_start, '-', (size_t) key_length, parser->encoding_changed, &parser->encoding);
6427
+ const uint8_t *dash = pm_memchr(key_start, '-', (size_t) key_length, parser->encoding_changed, parser->encoding);
6447
6428
 
6448
6429
  if (dash == NULL) {
6449
6430
  pm_string_shared_init(&key, key_start, key_end);
@@ -6455,7 +6436,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
6455
6436
  memcpy(buffer, key_start, width);
6456
6437
  buffer[dash - key_start] = '_';
6457
6438
 
6458
- while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, &parser->encoding)) != NULL) {
6439
+ while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
6459
6440
  buffer[dash - key_start] = '_';
6460
6441
  }
6461
6442
 
@@ -6530,6 +6511,7 @@ context_terminator(pm_context_t context, pm_token_t *token) {
6530
6511
  case PM_CONTEXT_ELSE:
6531
6512
  case PM_CONTEXT_FOR:
6532
6513
  case PM_CONTEXT_ENSURE:
6514
+ case PM_CONTEXT_ENSURE_DEF:
6533
6515
  return token->type == PM_TOKEN_KEYWORD_END;
6534
6516
  case PM_CONTEXT_FOR_INDEX:
6535
6517
  return token->type == PM_TOKEN_KEYWORD_IN;
@@ -6550,8 +6532,10 @@ context_terminator(pm_context_t context, pm_token_t *token) {
6550
6532
  return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
6551
6533
  case PM_CONTEXT_BEGIN:
6552
6534
  case PM_CONTEXT_RESCUE:
6535
+ case PM_CONTEXT_RESCUE_DEF:
6553
6536
  return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
6554
6537
  case PM_CONTEXT_RESCUE_ELSE:
6538
+ case PM_CONTEXT_RESCUE_ELSE_DEF:
6555
6539
  return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
6556
6540
  case PM_CONTEXT_LAMBDA_BRACES:
6557
6541
  return token->type == PM_TOKEN_BRACE_RIGHT;
@@ -6617,6 +6601,10 @@ context_def_p(pm_parser_t *parser) {
6617
6601
  while (context_node != NULL) {
6618
6602
  switch (context_node->context) {
6619
6603
  case PM_CONTEXT_DEF:
6604
+ case PM_CONTEXT_DEF_PARAMS:
6605
+ case PM_CONTEXT_ENSURE_DEF:
6606
+ case PM_CONTEXT_RESCUE_DEF:
6607
+ case PM_CONTEXT_RESCUE_ELSE_DEF:
6620
6608
  return true;
6621
6609
  case PM_CONTEXT_CLASS:
6622
6610
  case PM_CONTEXT_MODULE:
@@ -6979,9 +6967,16 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
6979
6967
  const uint8_t *end = parser->end;
6980
6968
  const uint8_t *current_start = parser->current.start;
6981
6969
  const uint8_t *current_end = parser->current.end;
6970
+ bool encoding_changed = parser->encoding_changed;
6982
6971
 
6983
- while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
6984
- current_end += width;
6972
+ if (encoding_changed) {
6973
+ while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
6974
+ current_end += width;
6975
+ }
6976
+ } else {
6977
+ while (current_end < end && (width = char_is_identifier_utf8(current_end, end)) > 0) {
6978
+ current_end += width;
6979
+ }
6985
6980
  }
6986
6981
  parser->current.end = current_end;
6987
6982
 
@@ -7099,8 +7094,8 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
7099
7094
  }
7100
7095
  }
7101
7096
 
7102
- if (parser->encoding_changed) {
7103
- return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
7097
+ if (encoding_changed) {
7098
+ return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
7104
7099
  }
7105
7100
  return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
7106
7101
  }
@@ -7314,7 +7309,18 @@ escape_byte(uint8_t value, const uint8_t flags) {
7314
7309
  * Write a unicode codepoint to the given buffer.
7315
7310
  */
7316
7311
  static inline void
7317
- escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t *start, const uint8_t *end, uint32_t value) {
7312
+ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
7313
+ // \u escape sequences in string-like structures implicitly change the
7314
+ // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
7315
+ // literal.
7316
+ if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
7317
+ if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
7318
+ PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
7319
+ }
7320
+
7321
+ parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
7322
+ }
7323
+
7318
7324
  if (value <= 0x7F) { // 0xxxxxxx
7319
7325
  pm_buffer_append_byte(buffer, (uint8_t) value);
7320
7326
  } else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
@@ -7337,6 +7343,23 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t *st
7337
7343
  }
7338
7344
  }
7339
7345
 
7346
+ /**
7347
+ * When you're writing a byte to the unescape buffer, if the byte is non-ASCII
7348
+ * (i.e., the top bit is set) then it locks in the encoding.
7349
+ */
7350
+ static inline void
7351
+ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
7352
+ if (byte >= 0x80) {
7353
+ if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7354
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
7355
+ }
7356
+
7357
+ parser->explicit_encoding = parser->encoding;
7358
+ }
7359
+
7360
+ pm_buffer_append_byte(buffer, byte);
7361
+ }
7362
+
7340
7363
  /**
7341
7364
  * The regular expression engine doesn't support the same escape sequences as
7342
7365
  * Ruby does. So first we have to read the escape sequence, and then we have to
@@ -7353,7 +7376,7 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t *st
7353
7376
  * source so that the regular expression engine will perform its own unescaping.
7354
7377
  */
7355
7378
  static inline void
7356
- escape_write_byte(pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
7379
+ escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
7357
7380
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
7358
7381
  pm_buffer_append_bytes(buffer, (const uint8_t *) "\\x", 2);
7359
7382
 
@@ -7372,7 +7395,7 @@ escape_write_byte(pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
7372
7395
  pm_buffer_append_byte(buffer, (uint8_t) (byte2 + '0'));
7373
7396
  }
7374
7397
  } else {
7375
- pm_buffer_append_byte(buffer, byte);
7398
+ escape_write_byte_encoded(parser, buffer, byte);
7376
7399
  }
7377
7400
  }
7378
7401
 
@@ -7384,57 +7407,57 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7384
7407
  switch (peek(parser)) {
7385
7408
  case '\\': {
7386
7409
  parser->current.end++;
7387
- pm_buffer_append_byte(buffer, '\\');
7410
+ escape_write_byte_encoded(parser, buffer, escape_byte('\\', flags));
7388
7411
  return;
7389
7412
  }
7390
7413
  case '\'': {
7391
7414
  parser->current.end++;
7392
- pm_buffer_append_byte(buffer, '\'');
7415
+ escape_write_byte_encoded(parser, buffer, escape_byte('\'', flags));
7393
7416
  return;
7394
7417
  }
7395
7418
  case 'a': {
7396
7419
  parser->current.end++;
7397
- pm_buffer_append_byte(buffer, '\a');
7420
+ escape_write_byte_encoded(parser, buffer, escape_byte('\a', flags));
7398
7421
  return;
7399
7422
  }
7400
7423
  case 'b': {
7401
7424
  parser->current.end++;
7402
- pm_buffer_append_byte(buffer, '\b');
7425
+ escape_write_byte_encoded(parser, buffer, escape_byte('\b', flags));
7403
7426
  return;
7404
7427
  }
7405
7428
  case 'e': {
7406
7429
  parser->current.end++;
7407
- pm_buffer_append_byte(buffer, '\033');
7430
+ escape_write_byte_encoded(parser, buffer, escape_byte('\033', flags));
7408
7431
  return;
7409
7432
  }
7410
7433
  case 'f': {
7411
7434
  parser->current.end++;
7412
- pm_buffer_append_byte(buffer, '\f');
7435
+ escape_write_byte_encoded(parser, buffer, escape_byte('\f', flags));
7413
7436
  return;
7414
7437
  }
7415
7438
  case 'n': {
7416
7439
  parser->current.end++;
7417
- pm_buffer_append_byte(buffer, '\n');
7440
+ escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
7418
7441
  return;
7419
7442
  }
7420
7443
  case 'r': {
7421
7444
  parser->current.end++;
7422
- pm_buffer_append_byte(buffer, '\r');
7445
+ escape_write_byte_encoded(parser, buffer, escape_byte('\r', flags));
7423
7446
  return;
7424
7447
  }
7425
7448
  case 's': {
7426
7449
  parser->current.end++;
7427
- pm_buffer_append_byte(buffer, ' ');
7450
+ escape_write_byte_encoded(parser, buffer, escape_byte(' ', flags));
7428
7451
  return;
7429
7452
  }
7430
7453
  case 't': {
7431
7454
  parser->current.end++;
7432
- pm_buffer_append_byte(buffer, '\t');
7455
+ escape_write_byte_encoded(parser, buffer, escape_byte('\t', flags));
7433
7456
  return;
7434
7457
  }
7435
7458
  case 'v': {
7436
7459
  parser->current.end++;
7437
- pm_buffer_append_byte(buffer, '\v');
7460
+ escape_write_byte_encoded(parser, buffer, escape_byte('\v', flags));
7438
7461
  return;
7439
7462
  }
7440
7463
  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
@@ -7451,7 +7474,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7451
7474
  }
7452
7475
  }
7453
7476
 
7454
- pm_buffer_append_byte(buffer, value);
7477
+ escape_write_byte_encoded(parser, buffer, value);
7455
7478
  return;
7456
7479
  }
7457
7480
  case 'x': {
@@ -7473,7 +7496,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7473
7496
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
7474
7497
  pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end - start));
7475
7498
  } else {
7476
- pm_buffer_append_byte(buffer, value);
7499
+ escape_write_byte_encoded(parser, buffer, value);
7477
7500
  }
7478
7501
  } else {
7479
7502
  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
@@ -7497,7 +7520,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7497
7520
  if (flags & PM_ESCAPE_FLAG_REGEXP) {
7498
7521
  pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end + 4 - start));
7499
7522
  } else {
7500
- escape_write_unicode(parser, buffer, start, parser->current.end + 4, value);
7523
+ escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
7501
7524
  }
7502
7525
 
7503
7526
  parser->current.end += 4;
@@ -7531,13 +7554,14 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7531
7554
 
7532
7555
  if (!(flags & PM_ESCAPE_FLAG_REGEXP)) {
7533
7556
  uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
7534
- escape_write_unicode(parser, buffer, unicode_start, parser->current.end, value);
7557
+ escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
7535
7558
  }
7536
7559
 
7537
7560
  parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
7538
7561
  }
7539
7562
 
7540
- // ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
7563
+ // ?\u{nnnn} character literal should contain only one codepoint
7564
+ // and cannot be like ?\u{nnnn mmmm}.
7541
7565
  if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
7542
7566
  pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
7543
7567
  }
@@ -7568,7 +7592,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7568
7592
  switch (peeked) {
7569
7593
  case '?': {
7570
7594
  parser->current.end++;
7571
- escape_write_byte(buffer, flags, escape_byte(0x7f, flags));
7595
+ escape_write_byte(parser, buffer, flags, escape_byte(0x7f, flags));
7572
7596
  return;
7573
7597
  }
7574
7598
  case '\\':
@@ -7586,7 +7610,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7586
7610
  }
7587
7611
 
7588
7612
  parser->current.end++;
7589
- escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
7613
+ escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
7590
7614
  return;
7591
7615
  }
7592
7616
  }
@@ -7608,7 +7632,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7608
7632
  switch (peeked) {
7609
7633
  case '?': {
7610
7634
  parser->current.end++;
7611
- escape_write_byte(buffer, flags, escape_byte(0x7f, flags));
7635
+ escape_write_byte(parser, buffer, flags, escape_byte(0x7f, flags));
7612
7636
  return;
7613
7637
  }
7614
7638
  case '\\':
@@ -7626,7 +7650,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7626
7650
  }
7627
7651
 
7628
7652
  parser->current.end++;
7629
- escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
7653
+ escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
7630
7654
  return;
7631
7655
  }
7632
7656
  }
@@ -7661,20 +7685,20 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7661
7685
  }
7662
7686
 
7663
7687
  parser->current.end++;
7664
- escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
7688
+ escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
7665
7689
  return;
7666
7690
  }
7667
7691
  case '\r': {
7668
7692
  if (peek_offset(parser, 1) == '\n') {
7669
7693
  parser->current.end += 2;
7670
- pm_buffer_append_byte(buffer, '\n');
7694
+ escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
7671
7695
  return;
7672
7696
  }
7673
7697
  }
7674
7698
  /* fallthrough */
7675
7699
  default: {
7676
7700
  if (parser->current.end < parser->end) {
7677
- pm_buffer_append_byte(buffer, *parser->current.end++);
7701
+ escape_write_byte_encoded(parser, buffer, *parser->current.end++);
7678
7702
  }
7679
7703
  return;
7680
7704
  }
@@ -7737,13 +7761,12 @@ lex_question_mark(pm_parser_t *parser) {
7737
7761
 
7738
7762
  return PM_TOKEN_CHARACTER_LITERAL;
7739
7763
  } else {
7740
- size_t encoding_width = parser->encoding.char_width(parser->current.end, parser->end - parser->current.end);
7764
+ size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
7741
7765
 
7742
- // Ternary operators can have a ? immediately followed by an identifier which starts with
7743
- // an underscore. We check for this case
7766
+ // Ternary operators can have a ? immediately followed by an identifier
7767
+ // which starts with an underscore. We check for this case here.
7744
7768
  if (
7745
- !(parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end) ||
7746
- peek(parser) == '_') ||
7769
+ !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
7747
7770
  (
7748
7771
  (parser->current.end + encoding_width >= parser->end) ||
7749
7772
  !char_is_identifier(parser, parser->current.end + encoding_width)
@@ -7809,8 +7832,7 @@ parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
7809
7832
 
7810
7833
  *comment = (pm_comment_t) {
7811
7834
  .type = type,
7812
- .start = parser->current.start,
7813
- .end = parser->current.end
7835
+ .location = { parser->current.start, parser->current.end }
7814
7836
  };
7815
7837
 
7816
7838
  return comment;
@@ -7861,7 +7883,7 @@ lex_embdoc(pm_parser_t *parser) {
7861
7883
  parser->current.type = PM_TOKEN_EMBDOC_END;
7862
7884
  parser_lex_callback(parser);
7863
7885
 
7864
- comment->end = parser->current.end;
7886
+ comment->location.end = parser->current.end;
7865
7887
  pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
7866
7888
 
7867
7889
  return PM_TOKEN_EMBDOC_END;
@@ -7884,7 +7906,7 @@ lex_embdoc(pm_parser_t *parser) {
7884
7906
 
7885
7907
  pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
7886
7908
 
7887
- comment->end = parser->current.end;
7909
+ comment->location.end = parser->current.end;
7888
7910
  pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
7889
7911
 
7890
7912
  return PM_TOKEN_EOF;
@@ -8592,6 +8614,7 @@ parser_lex(pm_parser_t *parser) {
8592
8614
  // TODO: handle unterminated heredoc
8593
8615
  }
8594
8616
 
8617
+ parser->explicit_encoding = NULL;
8595
8618
  lex_mode_push(parser, (pm_lex_mode_t) {
8596
8619
  .mode = PM_LEX_HEREDOC,
8597
8620
  .as.heredoc = {
@@ -8998,7 +9021,7 @@ parser_lex(pm_parser_t *parser) {
8998
9021
  (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
8999
9022
  lex_state_spcarg_p(parser, space_seen)
9000
9023
  ) {
9001
- if (!parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end)) {
9024
+ if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
9002
9025
  if (*parser->current.end >= 0x80) {
9003
9026
  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
9004
9027
  }
@@ -9021,7 +9044,7 @@ parser_lex(pm_parser_t *parser) {
9021
9044
  // Delimiters for %-literals cannot be alphanumeric. We
9022
9045
  // validate that here.
9023
9046
  uint8_t delimiter = peek_offset(parser, 1);
9024
- if (delimiter >= 0x80 || parser->encoding.alnum_char(&delimiter, 1)) {
9047
+ if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
9025
9048
  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
9026
9049
  goto lex_next_token;
9027
9050
  }
@@ -9207,8 +9230,8 @@ parser_lex(pm_parser_t *parser) {
9207
9230
  parser->current.type = PM_TOKEN___END__;
9208
9231
  parser_lex_callback(parser);
9209
9232
 
9210
- pm_comment_t *comment = parser_comment(parser, PM_COMMENT___END__);
9211
- pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9233
+ parser->data_loc.start = parser->current.start;
9234
+ parser->data_loc.end = parser->current.end;
9212
9235
 
9213
9236
  LEX(PM_TOKEN_EOF);
9214
9237
  }
@@ -9437,7 +9460,9 @@ parser_lex(pm_parser_t *parser) {
9437
9460
 
9438
9461
  // If we were unable to find a breakpoint, then this token hits the
9439
9462
  // end of the file.
9440
- LEX(PM_TOKEN_EOF);
9463
+ parser->current.end = parser->end;
9464
+ pm_token_buffer_flush(parser, &token_buffer);
9465
+ LEX(PM_TOKEN_STRING_CONTENT);
9441
9466
  }
9442
9467
  case PM_LEX_REGEXP: {
9443
9468
  // First, we'll set to start of this token to be the current end.
@@ -9545,7 +9570,9 @@ parser_lex(pm_parser_t *parser) {
9545
9570
  case '\r':
9546
9571
  parser->current.end++;
9547
9572
  if (peek(parser) != '\n') {
9548
- pm_token_buffer_push(&token_buffer, '\\');
9573
+ if (lex_mode->as.regexp.terminator != '\r') {
9574
+ pm_token_buffer_push(&token_buffer, '\\');
9575
+ }
9549
9576
  pm_token_buffer_push(&token_buffer, '\r');
9550
9577
  break;
9551
9578
  }
@@ -9573,7 +9600,20 @@ parser_lex(pm_parser_t *parser) {
9573
9600
  escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_REGEXP);
9574
9601
  break;
9575
9602
  default:
9576
- if (lex_mode->as.regexp.terminator == '/' && peeked == '/') {
9603
+ if (lex_mode->as.regexp.terminator == peeked) {
9604
+ // Some characters when they are used as the
9605
+ // terminator also receive an escape. They are
9606
+ // enumerated here.
9607
+ switch (peeked) {
9608
+ case '$': case ')': case '*': case '+':
9609
+ case '.': case '>': case '?': case ']':
9610
+ case '^': case '|': case '}':
9611
+ pm_token_buffer_push(&token_buffer, '\\');
9612
+ break;
9613
+ default:
9614
+ break;
9615
+ }
9616
+
9577
9617
  pm_token_buffer_push(&token_buffer, peeked);
9578
9618
  parser->current.end++;
9579
9619
  break;
@@ -9626,7 +9666,9 @@ parser_lex(pm_parser_t *parser) {
9626
9666
 
9627
9667
  // If we were unable to find a breakpoint, then this token hits the
9628
9668
  // end of the file.
9629
- LEX(PM_TOKEN_EOF);
9669
+ parser->current.end = parser->end;
9670
+ pm_token_buffer_flush(parser, &token_buffer);
9671
+ LEX(PM_TOKEN_STRING_CONTENT);
9630
9672
  }
9631
9673
  case PM_LEX_STRING: {
9632
9674
  // First, we'll set to start of this token to be the current end.
@@ -9830,8 +9872,10 @@ parser_lex(pm_parser_t *parser) {
9830
9872
  }
9831
9873
 
9832
9874
  // If we've hit the end of the string, then this is an unterminated
9833
- // string. In that case we'll return the EOF token.
9834
- LEX(PM_TOKEN_EOF);
9875
+ // string. In that case we'll return a string content token.
9876
+ parser->current.end = parser->end;
9877
+ pm_token_buffer_flush(parser, &token_buffer);
9878
+ LEX(PM_TOKEN_STRING_CONTENT);
9835
9879
  }
9836
9880
  case PM_LEX_HEREDOC: {
9837
9881
  // First, we'll set to start of this token.
@@ -9860,24 +9904,42 @@ parser_lex(pm_parser_t *parser) {
9860
9904
  // terminator, then we need to return the ending of the heredoc.
9861
9905
  if (current_token_starts_line(parser)) {
9862
9906
  const uint8_t *start = parser->current.start;
9863
- size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
9907
+ if (start + ident_length <= parser->end) {
9908
+ const uint8_t *newline = next_newline(start, parser->end - start);
9909
+ const uint8_t *ident_end = newline;
9910
+ const uint8_t *terminator_end = newline;
9911
+
9912
+ if (newline == NULL) {
9913
+ terminator_end = parser->end;
9914
+ ident_end = parser->end;
9915
+ } else {
9916
+ terminator_end++;
9917
+ if (newline[-1] == '\r') {
9918
+ ident_end--; // Remove \r
9919
+ }
9920
+ }
9864
9921
 
9865
- if ((start + ident_length <= parser->end) && (memcmp(start, ident_start, ident_length) == 0)) {
9866
- bool matched = true;
9867
- bool at_end = false;
9922
+ const uint8_t *terminator_start = ident_end - ident_length;
9923
+ const uint8_t *cursor = start;
9868
9924
 
9869
- size_t eol_length = match_eol_at(parser, start + ident_length);
9870
- if (eol_length) {
9871
- parser->current.end = start + ident_length + eol_length;
9872
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9873
- } else if (parser->end == (start + ident_length)) {
9874
- parser->current.end = start + ident_length;
9875
- at_end = true;
9876
- } else {
9877
- matched = false;
9925
+ if (
9926
+ lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_DASH ||
9927
+ lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE
9928
+ ) {
9929
+ while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
9930
+ cursor++;
9931
+ }
9878
9932
  }
9879
9933
 
9880
- if (matched) {
9934
+ if (
9935
+ (cursor == terminator_start) &&
9936
+ (memcmp(terminator_start, ident_start, ident_length) == 0)
9937
+ ) {
9938
+ if (newline != NULL) {
9939
+ pm_newline_list_append(&parser->newline_list, newline);
9940
+ }
9941
+
9942
+ parser->current.end = terminator_end;
9881
9943
  if (*lex_mode->as.heredoc.next_start == '\\') {
9882
9944
  parser->next_start = NULL;
9883
9945
  } else {
@@ -9885,15 +9947,12 @@ parser_lex(pm_parser_t *parser) {
9885
9947
  parser->heredoc_end = parser->current.end;
9886
9948
  }
9887
9949
 
9888
- parser->current_string_common_whitespace = parser->lex_modes.current->as.heredoc.common_whitespace;
9889
- lex_mode_pop(parser);
9890
- if (!at_end) {
9891
- lex_state_set(parser, PM_LEX_STATE_END);
9892
- }
9950
+ lex_state_set(parser, PM_LEX_STATE_END);
9893
9951
  LEX(PM_TOKEN_HEREDOC_END);
9894
9952
  }
9895
9953
  }
9896
9954
 
9955
+ size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
9897
9956
  if (
9898
9957
  lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE &&
9899
9958
  (lex_mode->as.heredoc.common_whitespace > whitespace) &&
@@ -9937,23 +9996,35 @@ parser_lex(pm_parser_t *parser) {
9937
9996
  // If we have a - or ~ heredoc, then we can match after
9938
9997
  // some leading whitespace.
9939
9998
  const uint8_t *start = breakpoint + 1;
9940
- size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
9941
9999
 
9942
- // If we have hit a newline that is followed by a valid
9943
- // terminator, then we need to return the content of the
9944
- // heredoc here as string content. Then, the next time a
9945
- // token is lexed, it will match again and return the
9946
- // end of the heredoc.
9947
- if (
9948
- !was_escaped_newline &&
9949
- (start + ident_length <= parser->end) &&
9950
- (memcmp(start, ident_start, ident_length) == 0)
9951
- ) {
9952
- // Heredoc terminators must be followed by a
9953
- // newline, CRLF, or EOF to be valid.
10000
+ if (!was_escaped_newline && (start + ident_length <= parser->end)) {
10001
+ // We want to match the terminator starting from the end of the line in case
10002
+ // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
10003
+ const uint8_t *newline = next_newline(start, parser->end - start);
10004
+
10005
+ if (newline == NULL) {
10006
+ newline = parser->end;
10007
+ } else if (newline[-1] == '\r') {
10008
+ newline--; // Remove \r
10009
+ }
10010
+
10011
+ // Start of a possible terminator.
10012
+ const uint8_t *terminator_start = newline - ident_length;
10013
+
10014
+ // Cursor to check for the leading whitespace. We skip the
10015
+ // leading whitespace if we have a - or ~ heredoc.
10016
+ const uint8_t *cursor = start;
10017
+
10018
+ if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_DASH ||
10019
+ lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
10020
+ while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
10021
+ cursor++;
10022
+ }
10023
+ }
10024
+
9954
10025
  if (
9955
- start + ident_length == parser->end ||
9956
- match_eol_at(parser, start + ident_length)
10026
+ cursor == terminator_start &&
10027
+ (memcmp(terminator_start, ident_start, ident_length) == 0)
9957
10028
  ) {
9958
10029
  parser->current.end = breakpoint + 1;
9959
10030
  pm_token_buffer_flush(parser, &token_buffer);
@@ -9961,6 +10032,14 @@ parser_lex(pm_parser_t *parser) {
9961
10032
  }
9962
10033
  }
9963
10034
 
10035
+ size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
10036
+
10037
+ // If we have hit a newline that is followed by a valid
10038
+ // terminator, then we need to return the content of the
10039
+ // heredoc here as string content. Then, the next time a
10040
+ // token is lexed, it will match again and return the
10041
+ // end of the heredoc.
10042
+
9964
10043
  if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
9965
10044
  if ((lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
9966
10045
  lex_mode->as.heredoc.common_whitespace = whitespace;
@@ -10078,8 +10157,10 @@ parser_lex(pm_parser_t *parser) {
10078
10157
  }
10079
10158
 
10080
10159
  // If we've hit the end of the string, then this is an unterminated
10081
- // heredoc. In that case we'll return the EOF token.
10082
- LEX(PM_TOKEN_EOF);
10160
+ // heredoc. In that case we'll return a string content token.
10161
+ parser->current.end = parser->end;
10162
+ pm_token_buffer_flush(parser, &token_buffer);
10163
+ LEX(PM_TOKEN_STRING_CONTENT);
10083
10164
  }
10084
10165
  }
10085
10166
 
@@ -10101,32 +10182,33 @@ parser_lex(pm_parser_t *parser) {
10101
10182
  * specify their associativity by adding or subtracting one.
10102
10183
  */
10103
10184
  typedef enum {
10104
- PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
10105
- PM_BINDING_POWER_STATEMENT = 2,
10106
- PM_BINDING_POWER_MODIFIER = 4, // if unless until while
10107
- PM_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue
10108
- PM_BINDING_POWER_COMPOSITION = 8, // and or
10109
- PM_BINDING_POWER_NOT = 10, // not
10110
- PM_BINDING_POWER_MATCH = 12, // => in
10111
- PM_BINDING_POWER_DEFINED = 14, // defined?
10112
- PM_BINDING_POWER_ASSIGNMENT = 16, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
10113
- PM_BINDING_POWER_TERNARY = 18, // ?:
10114
- PM_BINDING_POWER_RANGE = 20, // .. ...
10115
- PM_BINDING_POWER_LOGICAL_OR = 22, // ||
10116
- PM_BINDING_POWER_LOGICAL_AND = 24, // &&
10117
- PM_BINDING_POWER_EQUALITY = 26, // <=> == === != =~ !~
10118
- PM_BINDING_POWER_COMPARISON = 28, // > >= < <=
10119
- PM_BINDING_POWER_BITWISE_OR = 30, // | ^
10120
- PM_BINDING_POWER_BITWISE_AND = 32, // &
10121
- PM_BINDING_POWER_SHIFT = 34, // << >>
10122
- PM_BINDING_POWER_TERM = 36, // + -
10123
- PM_BINDING_POWER_FACTOR = 38, // * / %
10124
- PM_BINDING_POWER_UMINUS = 40, // -@
10125
- PM_BINDING_POWER_EXPONENT = 42, // **
10126
- PM_BINDING_POWER_UNARY = 44, // ! ~ +@
10127
- PM_BINDING_POWER_INDEX = 46, // [] []=
10128
- PM_BINDING_POWER_CALL = 48, // :: .
10129
- PM_BINDING_POWER_MAX = 50
10185
+ PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
10186
+ PM_BINDING_POWER_STATEMENT = 2,
10187
+ PM_BINDING_POWER_MODIFIER = 4, // if unless until while
10188
+ PM_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue
10189
+ PM_BINDING_POWER_COMPOSITION = 8, // and or
10190
+ PM_BINDING_POWER_NOT = 10, // not
10191
+ PM_BINDING_POWER_MATCH = 12, // => in
10192
+ PM_BINDING_POWER_DEFINED = 14, // defined?
10193
+ PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
10194
+ PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
10195
+ PM_BINDING_POWER_TERNARY = 20, // ?:
10196
+ PM_BINDING_POWER_RANGE = 22, // .. ...
10197
+ PM_BINDING_POWER_LOGICAL_OR = 24, // ||
10198
+ PM_BINDING_POWER_LOGICAL_AND = 26, // &&
10199
+ PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
10200
+ PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
10201
+ PM_BINDING_POWER_BITWISE_OR = 32, // | ^
10202
+ PM_BINDING_POWER_BITWISE_AND = 34, // &
10203
+ PM_BINDING_POWER_SHIFT = 36, // << >>
10204
+ PM_BINDING_POWER_TERM = 38, // + -
10205
+ PM_BINDING_POWER_FACTOR = 40, // * / %
10206
+ PM_BINDING_POWER_UMINUS = 42, // -@
10207
+ PM_BINDING_POWER_EXPONENT = 44, // **
10208
+ PM_BINDING_POWER_UNARY = 46, // ! ~ +@
10209
+ PM_BINDING_POWER_INDEX = 48, // [] []=
10210
+ PM_BINDING_POWER_CALL = 50, // :: .
10211
+ PM_BINDING_POWER_MAX = 52
10130
10212
  } pm_binding_power_t;
10131
10213
 
10132
10214
  /**
@@ -10153,7 +10235,7 @@ typedef struct {
10153
10235
  #define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
10154
10236
  #define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
10155
10237
  #define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
10156
- #define NON_ASSOCIATIVE(precedence) { precedence + 1, precedence + 1, true, true }
10238
+ #define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
10157
10239
  #define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
10158
10240
 
10159
10241
  pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
@@ -10196,6 +10278,8 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
10196
10278
  // .. ...
10197
10279
  [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
10198
10280
  [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
10281
+ [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
10282
+ [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
10199
10283
 
10200
10284
  // ||
10201
10285
  [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
@@ -10204,12 +10288,12 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
10204
10288
  [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
10205
10289
 
10206
10290
  // != !~ == === =~ <=>
10207
- [PM_TOKEN_BANG_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10208
- [PM_TOKEN_BANG_TILDE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10209
- [PM_TOKEN_EQUAL_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10210
- [PM_TOKEN_EQUAL_EQUAL_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10211
- [PM_TOKEN_EQUAL_TILDE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10212
- [PM_TOKEN_LESS_EQUAL_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10291
+ [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10292
+ [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10293
+ [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10294
+ [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10295
+ [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10296
+ [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10213
10297
 
10214
10298
  // > >= < <=
10215
10299
  [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
@@ -10289,6 +10373,14 @@ match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
10289
10373
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
10290
10374
  }
10291
10375
 
10376
+ /**
10377
+ * Returns true if the current token is any of the four given types.
10378
+ */
10379
+ static inline bool
10380
+ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
10381
+ return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
10382
+ }
10383
+
10292
10384
  /**
10293
10385
  * Returns true if the current token is any of the five given types.
10294
10386
  */
@@ -10414,14 +10506,14 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
10414
10506
  }
10415
10507
 
10416
10508
  static pm_node_t *
10417
- parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id);
10509
+ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id);
10418
10510
 
10419
10511
  /**
10420
10512
  * This is a wrapper of parse_expression, which also checks whether the resulting node is value expression.
10421
10513
  */
10422
10514
  static pm_node_t *
10423
- parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
10424
- pm_node_t *node = parse_expression(parser, binding_power, diag_id);
10515
+ parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
10516
+ pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, diag_id);
10425
10517
  pm_assert_value_expression(parser, node);
10426
10518
  return node;
10427
10519
  }
@@ -10506,14 +10598,14 @@ token_begins_expression_p(pm_token_type_t type) {
10506
10598
  * prefixed by the * operator.
10507
10599
  */
10508
10600
  static pm_node_t *
10509
- parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
10601
+ parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
10510
10602
  if (accept1(parser, PM_TOKEN_USTAR)) {
10511
10603
  pm_token_t operator = parser->previous;
10512
- pm_node_t *expression = parse_value_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
10604
+ pm_node_t *expression = parse_value_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
10513
10605
  return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
10514
10606
  }
10515
10607
 
10516
- return parse_value_expression(parser, binding_power, diag_id);
10608
+ return parse_value_expression(parser, binding_power, accepts_command_call, diag_id);
10517
10609
  }
10518
10610
 
10519
10611
  /**
@@ -10621,7 +10713,6 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
10621
10713
  pm_node_destroy(parser, target);
10622
10714
 
10623
10715
  uint32_t depth = 0;
10624
- for (pm_scope_t *scope = parser->current_scope; scope && scope->transparent; depth++, scope = scope->previous);
10625
10716
  const pm_token_t name = { .type = PM_TOKEN_IDENTIFIER, .start = message.start, .end = message.end };
10626
10717
  target = (pm_node_t *) pm_local_variable_read_node_create(parser, &name, depth);
10627
10718
 
@@ -10632,25 +10723,17 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
10632
10723
  return target;
10633
10724
  }
10634
10725
 
10635
- if (*call->message_loc.start == '_' || parser->encoding.alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
10726
+ if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
10636
10727
  parse_write_name(parser, &call->name);
10637
- return (pm_node_t *) call;
10728
+ return (pm_node_t *) pm_call_target_node_create(parser, call);
10638
10729
  }
10639
10730
  }
10640
10731
 
10641
10732
  // If there is no call operator and the message is "[]" then this is
10642
10733
  // an aref expression, and we can transform it into an aset
10643
10734
  // expression.
10644
- if (
10645
- (call->call_operator_loc.start == NULL) &&
10646
- (call->message_loc.start != NULL) &&
10647
- (call->message_loc.start[0] == '[') &&
10648
- (call->message_loc.end[-1] == ']') &&
10649
- (call->block == NULL)
10650
- ) {
10651
- // Replace the name with "[]=".
10652
- call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
10653
- return target;
10735
+ if (pm_call_node_index_p(call)) {
10736
+ return (pm_node_t *) pm_index_target_node_create(parser, call);
10654
10737
  }
10655
10738
  }
10656
10739
  /* fallthrough */
@@ -10690,6 +10773,7 @@ static pm_node_t *
10690
10773
  parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
10691
10774
  switch (PM_NODE_TYPE(target)) {
10692
10775
  case PM_MISSING_NODE:
10776
+ pm_node_destroy(parser, value);
10693
10777
  return target;
10694
10778
  case PM_CLASS_VARIABLE_READ_NODE: {
10695
10779
  pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
@@ -10700,6 +10784,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10700
10784
  return (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
10701
10785
  case PM_CONSTANT_READ_NODE: {
10702
10786
  pm_constant_write_node_t *node = pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
10787
+ if (context_def_p(parser)) {
10788
+ pm_parser_err_node(parser, (pm_node_t *) node, PM_ERR_WRITE_TARGET_IN_METHOD);
10789
+ }
10703
10790
  pm_node_destroy(parser, target);
10704
10791
  return (pm_node_t *) node;
10705
10792
  }
@@ -10779,7 +10866,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10779
10866
  return target;
10780
10867
  }
10781
10868
 
10782
- if (*call->message_loc.start == '_' || parser->encoding.alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
10869
+ if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
10783
10870
  // When we get here, we have a method call, because it was
10784
10871
  // previously marked as a method call but now we have an =. This
10785
10872
  // looks like:
@@ -10797,6 +10884,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10797
10884
  call->base.location.end = arguments->base.location.end;
10798
10885
 
10799
10886
  parse_write_name(parser, &call->name);
10887
+ pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE);
10800
10888
  return (pm_node_t *) call;
10801
10889
  }
10802
10890
  }
@@ -10804,13 +10892,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10804
10892
  // If there is no call operator and the message is "[]" then this is
10805
10893
  // an aref expression, and we can transform it into an aset
10806
10894
  // expression.
10807
- if (
10808
- (call->call_operator_loc.start == NULL) &&
10809
- (call->message_loc.start != NULL) &&
10810
- (call->message_loc.start[0] == '[') &&
10811
- (call->message_loc.end[-1] == ']') &&
10812
- (call->block == NULL)
10813
- ) {
10895
+ if (pm_call_node_index_p(call)) {
10814
10896
  if (call->arguments == NULL) {
10815
10897
  call->arguments = pm_arguments_node_create(parser);
10816
10898
  }
@@ -10820,6 +10902,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10820
10902
 
10821
10903
  // Replace the name with "[]=".
10822
10904
  call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
10905
+ pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE);
10823
10906
  return target;
10824
10907
  }
10825
10908
 
@@ -10852,7 +10935,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10852
10935
  */
10853
10936
  static pm_node_t *
10854
10937
  parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power) {
10855
- bool has_splat = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
10938
+ bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
10856
10939
 
10857
10940
  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
10858
10941
  pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
@@ -10862,7 +10945,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
10862
10945
  // Here we have a splat operator. It can have a name or be
10863
10946
  // anonymous. It can be the final target or be in the middle if
10864
10947
  // there haven't been any others yet.
10865
- if (has_splat) {
10948
+ if (has_rest) {
10866
10949
  pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
10867
10950
  }
10868
10951
 
@@ -10870,24 +10953,23 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
10870
10953
  pm_node_t *name = NULL;
10871
10954
 
10872
10955
  if (token_begins_expression_p(parser->current.type)) {
10873
- name = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
10956
+ name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
10874
10957
  name = parse_target(parser, name);
10875
10958
  }
10876
10959
 
10877
10960
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
10878
10961
  pm_multi_target_node_targets_append(parser, result, splat);
10879
- has_splat = true;
10962
+ has_rest = true;
10880
10963
  } else if (token_begins_expression_p(parser->current.type)) {
10881
- pm_node_t *target = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
10964
+ pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
10882
10965
  target = parse_target(parser, target);
10883
10966
 
10884
10967
  pm_multi_target_node_targets_append(parser, result, target);
10885
10968
  } else if (!match1(parser, PM_TOKEN_EOF)) {
10886
10969
  // If we get here, then we have a trailing , in a multi target node.
10887
- // We need to indicate this somehow in the tree, so we'll add an
10888
- // anonymous splat.
10889
- pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &parser->previous, NULL);
10890
- pm_multi_target_node_targets_append(parser, result, splat);
10970
+ // We'll set the implicit rest flag to indicate this.
10971
+ pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
10972
+ pm_multi_target_node_targets_append(parser, result, rest);
10891
10973
  break;
10892
10974
  }
10893
10975
  }
@@ -10930,7 +11012,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
10930
11012
  context_push(parser, context);
10931
11013
 
10932
11014
  while (true) {
10933
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_CANNOT_PARSE_EXPRESSION);
11015
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
10934
11016
  pm_statements_node_body_append(statements, node);
10935
11017
 
10936
11018
  // If we're recovering from a syntax error, then we need to stop parsing the
@@ -10984,7 +11066,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
10984
11066
  }
10985
11067
 
10986
11068
  /**
10987
- * Parse all of the elements of a hash. eturns true if a double splat was found.
11069
+ * Parse all of the elements of a hash. returns true if a double splat was found.
10988
11070
  */
10989
11071
  static bool
10990
11072
  parse_assocs(pm_parser_t *parser, pm_node_t *node) {
@@ -11001,7 +11083,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
11001
11083
  pm_node_t *value = NULL;
11002
11084
 
11003
11085
  if (token_begins_expression_p(parser->current.type)) {
11004
- value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11086
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11005
11087
  } else if (pm_parser_local_depth(parser, &operator) == -1) {
11006
11088
  pm_parser_err_token(parser, &operator, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11007
11089
  }
@@ -11019,9 +11101,9 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
11019
11101
  pm_node_t *value = NULL;
11020
11102
 
11021
11103
  if (token_begins_expression_p(parser->current.type)) {
11022
- value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_EXPRESSION_AFTER_LABEL);
11104
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL);
11023
11105
  } else {
11024
- if (parser->encoding.isupper_char(label.start, (label.end - 1) - label.start)) {
11106
+ if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
11025
11107
  pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
11026
11108
  value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
11027
11109
  } else {
@@ -11043,7 +11125,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
11043
11125
  break;
11044
11126
  }
11045
11127
  default: {
11046
- pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_KEY);
11128
+ pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_KEY);
11047
11129
  pm_token_t operator;
11048
11130
 
11049
11131
  if (pm_symbol_node_label_p(key)) {
@@ -11053,7 +11135,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
11053
11135
  operator = parser->previous;
11054
11136
  }
11055
11137
 
11056
- pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_VALUE);
11138
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
11057
11139
  element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
11058
11140
  break;
11059
11141
  }
@@ -11136,15 +11218,11 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11136
11218
  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
11137
11219
  argument = (pm_node_t *) hash;
11138
11220
 
11139
- bool contains_keyword_splat = false;
11140
- if (!match7(parser, terminator, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
11141
- contains_keyword_splat = parse_assocs(parser, (pm_node_t *) hash);
11142
- }
11143
-
11221
+ bool contains_keyword_splat = parse_assocs(parser, (pm_node_t *) hash);
11144
11222
  parsed_bare_hash = true;
11145
11223
  parse_arguments_append(parser, arguments, argument);
11146
11224
  if (contains_keyword_splat) {
11147
- arguments->arguments->base.flags |= PM_ARGUMENTS_NODE_FLAGS_KEYWORD_SPLAT;
11225
+ pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
11148
11226
  }
11149
11227
  break;
11150
11228
  }
@@ -11154,9 +11232,15 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11154
11232
  pm_node_t *expression = NULL;
11155
11233
 
11156
11234
  if (token_begins_expression_p(parser->current.type)) {
11157
- expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
11158
- } else if (pm_parser_local_depth(parser, &operator) == -1) {
11159
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
11235
+ expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_ARGUMENT);
11236
+ } else {
11237
+ if (pm_parser_local_depth(parser, &operator) == -1) {
11238
+ // A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
11239
+ pm_constant_id_t ellipsis_id = pm_parser_constant_id_constant(parser, "...", 3);
11240
+ if (pm_parser_local_depth_constant_id(parser, ellipsis_id) == -1) {
11241
+ pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
11242
+ }
11243
+ }
11160
11244
  }
11161
11245
 
11162
11246
  argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
@@ -11173,14 +11257,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11173
11257
  parser_lex(parser);
11174
11258
  pm_token_t operator = parser->previous;
11175
11259
 
11176
- if (match3(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON)) {
11260
+ if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
11177
11261
  if (pm_parser_local_depth(parser, &parser->previous) == -1) {
11178
11262
  pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
11179
11263
  }
11180
11264
 
11181
11265
  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
11182
11266
  } else {
11183
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
11267
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
11184
11268
 
11185
11269
  if (parsed_bare_hash) {
11186
11270
  pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
@@ -11200,7 +11284,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11200
11284
  // If the token begins an expression then this ... was not actually
11201
11285
  // argument forwarding but was instead a range.
11202
11286
  pm_token_t operator = parser->previous;
11203
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
11287
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
11204
11288
  argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
11205
11289
  } else {
11206
11290
  if (pm_parser_local_depth(parser, &parser->previous) == -1) {
@@ -11220,7 +11304,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11220
11304
  /* fallthrough */
11221
11305
  default: {
11222
11306
  if (argument == NULL) {
11223
- argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
11307
+ argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, PM_ERR_EXPECT_ARGUMENT);
11224
11308
  }
11225
11309
 
11226
11310
  bool contains_keyword_splat = false;
@@ -11239,7 +11323,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11239
11323
  pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
11240
11324
 
11241
11325
  // Finish parsing the one we are part way through
11242
- pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_VALUE);
11326
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
11243
11327
 
11244
11328
  argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
11245
11329
  pm_keyword_hash_node_elements_append(bare_hash, argument);
@@ -11258,7 +11342,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11258
11342
 
11259
11343
  parse_arguments_append(parser, arguments, argument);
11260
11344
  if (contains_keyword_splat) {
11261
- arguments->arguments->base.flags |= PM_ARGUMENTS_NODE_FLAGS_KEYWORD_SPLAT;
11345
+ pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
11262
11346
  }
11263
11347
  break;
11264
11348
  }
@@ -11310,11 +11394,14 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
11310
11394
  do {
11311
11395
  pm_node_t *param;
11312
11396
 
11313
- // If we get here then we have a trailing comma. In this case we'll
11314
- // create an implicit splat node.
11397
+ // If we get here then we have a trailing comma, which isn't allowed in
11398
+ // the grammar. In other places, multi targets _do_ allow trailing
11399
+ // commas, so here we'll assume this is a mistake of the user not
11400
+ // knowing it's not allowed here.
11315
11401
  if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
11316
- param = (pm_node_t *) pm_splat_node_create(parser, &parser->previous, NULL);
11402
+ param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
11317
11403
  pm_multi_target_node_targets_append(parser, node, param);
11404
+ pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
11318
11405
  break;
11319
11406
  }
11320
11407
 
@@ -11545,10 +11632,14 @@ parse_parameters(
11545
11632
  if (accept1(parser, PM_TOKEN_EQUAL)) {
11546
11633
  pm_token_t operator = parser->previous;
11547
11634
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11548
- pm_node_t *value = parse_value_expression(parser, binding_power, PM_ERR_PARAMETER_NO_DEFAULT);
11635
+ pm_constant_id_t old_param_name = parser->current_param_name;
11636
+ parser->current_param_name = pm_parser_constant_id_token(parser, &name);
11637
+ pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
11549
11638
 
11550
11639
  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
11551
11640
  pm_parameters_node_optionals_append(params, param);
11641
+
11642
+ parser->current_param_name = old_param_name;
11552
11643
  context_pop(parser);
11553
11644
 
11554
11645
  // If parsing the value of the parameter resulted in error recovery,
@@ -11604,7 +11695,10 @@ parse_parameters(
11604
11695
 
11605
11696
  if (token_begins_expression_p(parser->current.type)) {
11606
11697
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11607
- pm_node_t *value = parse_value_expression(parser, binding_power, PM_ERR_PARAMETER_NO_DEFAULT_KW);
11698
+ pm_constant_id_t old_param_name = parser->current_param_name;
11699
+ parser->current_param_name = pm_parser_constant_id_token(parser, &local);
11700
+ pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
11701
+ parser->current_param_name = old_param_name;
11608
11702
  context_pop(parser);
11609
11703
  param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
11610
11704
  }
@@ -11647,12 +11741,12 @@ parse_parameters(
11647
11741
  }
11648
11742
  }
11649
11743
 
11650
- pm_rest_parameter_node_t *param = pm_rest_parameter_node_create(parser, &operator, &name);
11744
+ pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
11651
11745
  if (params->rest == NULL) {
11652
11746
  pm_parameters_node_rest_set(params, param);
11653
11747
  } else {
11654
- pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
11655
- pm_parameters_node_posts_append(params, (pm_node_t *) param);
11748
+ pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
11749
+ pm_parameters_node_posts_append(params, param);
11656
11750
  }
11657
11751
 
11658
11752
  break;
@@ -11697,11 +11791,9 @@ parse_parameters(
11697
11791
  default:
11698
11792
  if (parser->previous.type == PM_TOKEN_COMMA) {
11699
11793
  if (allows_trailing_comma) {
11700
- // If we get here, then we have a trailing comma in a block
11701
- // parameter list. We need to create an anonymous rest parameter to
11702
- // represent it.
11703
- pm_token_t name = not_provided(parser);
11704
- pm_rest_parameter_node_t *param = pm_rest_parameter_node_create(parser, &parser->previous, &name);
11794
+ // If we get here, then we have a trailing comma in a
11795
+ // block parameter list.
11796
+ pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
11705
11797
 
11706
11798
  if (params->rest == NULL) {
11707
11799
  pm_parameters_node_rest_set(params, param);
@@ -11739,7 +11831,7 @@ parse_parameters(
11739
11831
  * nodes pointing to each other from the top.
11740
11832
  */
11741
11833
  static inline void
11742
- parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11834
+ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, bool def_p) {
11743
11835
  pm_rescue_node_t *current = NULL;
11744
11836
 
11745
11837
  while (accept1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
@@ -11753,7 +11845,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11753
11845
  parser_lex(parser);
11754
11846
  pm_rescue_node_operator_set(rescue, &parser->previous);
11755
11847
 
11756
- pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_RESCUE_VARIABLE);
11848
+ pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
11757
11849
  reference = parse_target(parser, reference);
11758
11850
 
11759
11851
  pm_rescue_node_reference_set(rescue, reference);
@@ -11771,7 +11863,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11771
11863
  // we'll attempt to parse it here and any others delimited by commas.
11772
11864
 
11773
11865
  do {
11774
- pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_RESCUE_EXPRESSION);
11866
+ pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION);
11775
11867
  pm_rescue_node_exceptions_append(rescue, expression);
11776
11868
 
11777
11869
  // If we hit a newline, then this is the end of the rescue expression. We
@@ -11783,7 +11875,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11783
11875
  if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
11784
11876
  pm_rescue_node_operator_set(rescue, &parser->previous);
11785
11877
 
11786
- pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_RESCUE_VARIABLE);
11878
+ pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
11787
11879
  reference = parse_target(parser, reference);
11788
11880
 
11789
11881
  pm_rescue_node_reference_set(rescue, reference);
@@ -11802,7 +11894,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11802
11894
 
11803
11895
  if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
11804
11896
  pm_accepts_block_stack_push(parser, true);
11805
- pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_RESCUE);
11897
+ pm_statements_node_t *statements = parse_statements(parser, def_p ? PM_CONTEXT_RESCUE_DEF : PM_CONTEXT_RESCUE);
11806
11898
  if (statements) {
11807
11899
  pm_rescue_node_statements_set(rescue, statements);
11808
11900
  }
@@ -11838,7 +11930,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11838
11930
  pm_statements_node_t *else_statements = NULL;
11839
11931
  if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
11840
11932
  pm_accepts_block_stack_push(parser, true);
11841
- else_statements = parse_statements(parser, PM_CONTEXT_RESCUE_ELSE);
11933
+ else_statements = parse_statements(parser, def_p ? PM_CONTEXT_RESCUE_ELSE_DEF : PM_CONTEXT_RESCUE_ELSE);
11842
11934
  pm_accepts_block_stack_pop(parser);
11843
11935
  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
11844
11936
  }
@@ -11854,7 +11946,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11854
11946
  pm_statements_node_t *ensure_statements = NULL;
11855
11947
  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
11856
11948
  pm_accepts_block_stack_push(parser, true);
11857
- ensure_statements = parse_statements(parser, PM_CONTEXT_ENSURE);
11949
+ ensure_statements = parse_statements(parser, def_p ? PM_CONTEXT_ENSURE_DEF : PM_CONTEXT_ENSURE);
11858
11950
  pm_accepts_block_stack_pop(parser);
11859
11951
  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
11860
11952
  }
@@ -11872,10 +11964,10 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node) {
11872
11964
  }
11873
11965
 
11874
11966
  static inline pm_begin_node_t *
11875
- parse_rescues_as_begin(pm_parser_t *parser, pm_statements_node_t *statements) {
11967
+ parse_rescues_as_begin(pm_parser_t *parser, pm_statements_node_t *statements, bool def_p) {
11876
11968
  pm_token_t no_begin_token = not_provided(parser);
11877
11969
  pm_begin_node_t *begin_node = pm_begin_node_create(parser, &no_begin_token, statements);
11878
- parse_rescues(parser, begin_node);
11970
+ parse_rescues(parser, begin_node, def_p);
11879
11971
 
11880
11972
  // All nodes within a begin node are optional, so we look
11881
11973
  // for the earliest possible node that we can use to set
@@ -11941,24 +12033,30 @@ parse_block(pm_parser_t *parser) {
11941
12033
 
11942
12034
  pm_accepts_block_stack_push(parser, true);
11943
12035
  pm_parser_scope_push(parser, false);
11944
- pm_block_parameters_node_t *parameters = NULL;
12036
+ pm_block_parameters_node_t *block_parameters = NULL;
11945
12037
 
11946
12038
  if (accept1(parser, PM_TOKEN_PIPE)) {
11947
12039
  parser->current_scope->explicit_params = true;
11948
12040
  pm_token_t block_parameters_opening = parser->previous;
11949
12041
 
11950
12042
  if (match1(parser, PM_TOKEN_PIPE)) {
11951
- parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
12043
+ block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
11952
12044
  parser->command_start = true;
11953
12045
  parser_lex(parser);
11954
12046
  } else {
11955
- parameters = parse_block_parameters(parser, true, &block_parameters_opening, false);
12047
+ block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false);
11956
12048
  accept1(parser, PM_TOKEN_NEWLINE);
11957
12049
  parser->command_start = true;
11958
12050
  expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
11959
12051
  }
11960
12052
 
11961
- pm_block_parameters_node_closing_set(parameters, &parser->previous);
12053
+ pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
12054
+ }
12055
+
12056
+ uint32_t locals_body_index = 0;
12057
+
12058
+ if (block_parameters) {
12059
+ locals_body_index = (uint32_t) parser->current_scope->locals.size;
11962
12060
  }
11963
12061
 
11964
12062
  accept1(parser, PM_TOKEN_NEWLINE);
@@ -11980,17 +12078,25 @@ parse_block(pm_parser_t *parser) {
11980
12078
 
11981
12079
  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
11982
12080
  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
11983
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements);
12081
+ statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
11984
12082
  }
11985
12083
  }
11986
12084
 
11987
12085
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
11988
12086
  }
11989
12087
 
12088
+ pm_node_t *parameters = (pm_node_t *) block_parameters;
12089
+ uint8_t maximum = parser->current_scope->numbered_parameters;
12090
+
12091
+ if (parameters == NULL && (maximum > 0)) {
12092
+ parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = opening.start, .end = parser->previous.end }, maximum);
12093
+ locals_body_index = maximum;
12094
+ }
12095
+
11990
12096
  pm_constant_id_list_t locals = parser->current_scope->locals;
11991
12097
  pm_parser_scope_pop(parser);
11992
12098
  pm_accepts_block_stack_pop(parser);
11993
- return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
12099
+ return pm_block_node_create(parser, &locals, locals_body_index, &opening, parameters, statements, &parser->previous);
11994
12100
  }
11995
12101
 
11996
12102
  /**
@@ -11999,7 +12105,7 @@ parse_block(pm_parser_t *parser) {
11999
12105
  * arguments, or blocks).
12000
12106
  */
12001
12107
  static bool
12002
- parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block) {
12108
+ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call) {
12003
12109
  bool found = false;
12004
12110
 
12005
12111
  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
@@ -12016,7 +12122,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
12016
12122
 
12017
12123
  arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
12018
12124
  }
12019
- } else if ((token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
12125
+ } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
12020
12126
  found |= true;
12021
12127
  pm_accepts_block_stack_push(parser, false);
12022
12128
 
@@ -12071,7 +12177,7 @@ static inline pm_node_t *
12071
12177
  parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword) {
12072
12178
  context_push(parser, PM_CONTEXT_PREDICATE);
12073
12179
  pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
12074
- pm_node_t *predicate = parse_value_expression(parser, binding_power, error_id);
12180
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, error_id);
12075
12181
 
12076
12182
  // Predicates are closed by a term, a "then", or a term and then a "then".
12077
12183
  bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
@@ -12266,6 +12372,26 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
12266
12372
  case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
12267
12373
  case PM_NUMBERED_REFERENCE_READ_NODE
12268
12374
 
12375
+ // Assert here that the flags are the same so that we can safely switch the type
12376
+ // of the node without having to move the flags.
12377
+ PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
12378
+
12379
+ /**
12380
+ * If the encoding was explicitly set through the lexing process, then we need
12381
+ * to potentially mark the string's flags to indicate how to encode it.
12382
+ */
12383
+ static inline pm_node_flags_t
12384
+ parse_unescaped_encoding(const pm_parser_t *parser) {
12385
+ if (parser->explicit_encoding != NULL) {
12386
+ if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
12387
+ return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
12388
+ } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
12389
+ return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
12390
+ }
12391
+ }
12392
+ return 0;
12393
+ }
12394
+
12269
12395
  /**
12270
12396
  * Parse a node that is part of a string. If the subsequent tokens cannot be
12271
12397
  * parsed as a string part, then NULL is returned.
@@ -12282,7 +12408,9 @@ parse_string_part(pm_parser_t *parser) {
12282
12408
  case PM_TOKEN_STRING_CONTENT: {
12283
12409
  pm_token_t opening = not_provided(parser);
12284
12410
  pm_token_t closing = not_provided(parser);
12411
+
12285
12412
  pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
12413
+ pm_node_flag_set(node, parse_unescaped_encoding(parser));
12286
12414
 
12287
12415
  parser_lex(parser);
12288
12416
  return node;
@@ -12451,7 +12579,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12451
12579
  }
12452
12580
 
12453
12581
  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
12454
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
12582
+ if (match1(parser, PM_TOKEN_EOF)) {
12583
+ pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
12584
+ } else {
12585
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
12586
+ }
12455
12587
 
12456
12588
  return (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &node_list, &parser->previous);
12457
12589
  }
@@ -12463,6 +12595,34 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12463
12595
  content = parser->current;
12464
12596
  unescaped = parser->current_string;
12465
12597
  parser_lex(parser);
12598
+
12599
+ // If we have two string contents in a row, then the content of this
12600
+ // symbol is split because of heredoc contents. This looks like:
12601
+ //
12602
+ // <<A; :'a
12603
+ // A
12604
+ // b'
12605
+ //
12606
+ // In this case, the best way we have to represent this is as an
12607
+ // interpolated string node, so that's what we'll do here.
12608
+ if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
12609
+ pm_node_list_t parts = { 0 };
12610
+ pm_token_t bounds = not_provided(parser);
12611
+
12612
+ pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
12613
+ pm_node_list_append(&parts, part);
12614
+
12615
+ part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
12616
+ pm_node_list_append(&parts, part);
12617
+
12618
+ if (next_state != PM_LEX_STATE_NONE) {
12619
+ lex_state_set(parser, next_state);
12620
+ }
12621
+
12622
+ parser_lex(parser);
12623
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
12624
+ return (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
12625
+ }
12466
12626
  } else {
12467
12627
  content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
12468
12628
  pm_string_shared_init(&unescaped, content.start, content.end);
@@ -12472,7 +12632,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12472
12632
  lex_state_set(parser, next_state);
12473
12633
  }
12474
12634
 
12475
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
12635
+ if (match1(parser, PM_TOKEN_EOF)) {
12636
+ pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
12637
+ } else {
12638
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
12639
+ }
12476
12640
  return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
12477
12641
  }
12478
12642
 
@@ -12561,9 +12725,9 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
12561
12725
  * numbered parameters.
12562
12726
  */
12563
12727
  static bool
12564
- outer_scope_using_numbered_params_p(pm_parser_t *parser) {
12728
+ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
12565
12729
  for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
12566
- if (scope->numbered_params) return true;
12730
+ if (scope->numbered_parameters) return true;
12567
12731
  }
12568
12732
 
12569
12733
  return false;
@@ -12583,25 +12747,32 @@ parse_variable_call(pm_parser_t *parser) {
12583
12747
  }
12584
12748
 
12585
12749
  if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
12586
- // Indicate that this scope is using numbered params so that child
12587
- // scopes cannot.
12588
- parser->current_scope->numbered_params = true;
12589
-
12590
12750
  // Now that we know we have a numbered parameter, we need to check
12591
12751
  // if it's allowed in this context. If it is, then we will create a
12592
12752
  // local variable read. If it's not, then we'll create a normal call
12593
12753
  // node but add an error.
12594
12754
  if (parser->current_scope->explicit_params) {
12595
12755
  pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
12596
- } else if (outer_scope_using_numbered_params_p(parser)) {
12756
+ } else if (outer_scope_using_numbered_parameters_p(parser)) {
12597
12757
  pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
12598
12758
  } else {
12759
+ // Indicate that this scope is using numbered params so that child
12760
+ // scopes cannot.
12761
+ uint8_t number = parser->previous.start[1];
12762
+
12763
+ // We subtract the value for the character '0' to get the actual
12764
+ // integer value of the number (only _1 through _9 are valid)
12765
+ uint8_t numbered_parameters = (uint8_t) (number - '0');
12766
+ if (numbered_parameters > parser->current_scope->numbered_parameters) {
12767
+ parser->current_scope->numbered_parameters = numbered_parameters;
12768
+ pm_parser_numbered_parameters_set(parser, numbered_parameters);
12769
+ }
12770
+
12599
12771
  // When you use a numbered parameter, it implies the existence
12600
12772
  // of all of the locals that exist before it. For example,
12601
12773
  // referencing _2 means that _1 must exist. Therefore here we
12602
12774
  // loop through all of the possibilities and add them into the
12603
12775
  // constant pool.
12604
- uint8_t number = parser->previous.start[1];
12605
12776
  uint8_t current = '1';
12606
12777
  uint8_t *value;
12607
12778
 
@@ -12624,7 +12795,7 @@ parse_variable_call(pm_parser_t *parser) {
12624
12795
  }
12625
12796
 
12626
12797
  pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
12627
- node->base.flags |= flags;
12798
+ pm_node_flag_set((pm_node_t *)node, flags);
12628
12799
 
12629
12800
  return (pm_node_t *) node;
12630
12801
  }
@@ -12803,7 +12974,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
12803
12974
  case PM_ARRAY_PATTERN_NODE: {
12804
12975
  pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
12805
12976
 
12806
- if (pattern_node->constant == NULL) {
12977
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
12807
12978
  pattern_node->base.location.start = node->location.start;
12808
12979
  pattern_node->base.location.end = closing.end;
12809
12980
 
@@ -12819,7 +12990,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
12819
12990
  case PM_FIND_PATTERN_NODE: {
12820
12991
  pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
12821
12992
 
12822
- if (pattern_node->constant == NULL) {
12993
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
12823
12994
  pattern_node->base.location.start = node->location.start;
12824
12995
  pattern_node->base.location.end = closing.end;
12825
12996
 
@@ -12835,7 +13006,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
12835
13006
  case PM_HASH_PATTERN_NODE: {
12836
13007
  pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
12837
13008
 
12838
- if (pattern_node->constant == NULL) {
13009
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
12839
13010
  pattern_node->base.location.start = node->location.start;
12840
13011
  pattern_node->base.location.end = closing.end;
12841
13012
 
@@ -12951,10 +13122,15 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
12951
13122
  break;
12952
13123
  }
12953
13124
 
12954
- pm_node_t *assoc;
12955
-
12956
13125
  if (match1(parser, PM_TOKEN_USTAR_STAR)) {
12957
- assoc = parse_pattern_keyword_rest(parser);
13126
+ pm_node_t *assoc = parse_pattern_keyword_rest(parser);
13127
+
13128
+ if (rest == NULL) {
13129
+ rest = assoc;
13130
+ } else {
13131
+ pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
13132
+ pm_node_list_append(&assocs, assoc);
13133
+ }
12958
13134
  } else {
12959
13135
  expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
12960
13136
  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
@@ -12968,10 +13144,14 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
12968
13144
  }
12969
13145
 
12970
13146
  pm_token_t operator = not_provided(parser);
12971
- assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
12972
- }
13147
+ pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
13148
+
13149
+ if (rest != NULL) {
13150
+ pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
13151
+ }
12973
13152
 
12974
- pm_node_list_append(&assocs, assoc);
13153
+ pm_node_list_append(&assocs, assoc);
13154
+ }
12975
13155
  }
12976
13156
 
12977
13157
  pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
@@ -12989,8 +13169,13 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
12989
13169
  case PM_TOKEN_IDENTIFIER:
12990
13170
  case PM_TOKEN_METHOD_NAME: {
12991
13171
  parser_lex(parser);
12992
- pm_parser_local_add_token(parser, &parser->previous);
12993
- return (pm_node_t *) pm_local_variable_target_node_create(parser, &parser->previous);
13172
+ pm_token_t name = parser->previous;
13173
+ int depth = pm_parser_local_depth(parser, &name);
13174
+ if (depth < 0) {
13175
+ depth = 0;
13176
+ pm_parser_local_add_token(parser, &name);
13177
+ }
13178
+ return (pm_node_t *) pm_local_variable_target_node_create_depth(parser, &name, (uint32_t) depth);
12994
13179
  }
12995
13180
  case PM_TOKEN_BRACKET_LEFT_ARRAY: {
12996
13181
  pm_token_t opening = parser->current;
@@ -13077,7 +13262,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13077
13262
  first_assoc = parse_pattern_keyword_rest(parser);
13078
13263
  break;
13079
13264
  case PM_TOKEN_STRING_BEGIN: {
13080
- pm_node_t *key = parse_expression(parser, PM_BINDING_POWER_MAX, PM_ERR_PATTERN_HASH_KEY);
13265
+ pm_node_t *key = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
13081
13266
  pm_token_t operator = not_provided(parser);
13082
13267
 
13083
13268
  if (!pm_symbol_node_label_p(key)) {
@@ -13124,7 +13309,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13124
13309
  // expression as the right side of the range.
13125
13310
  switch (parser->current.type) {
13126
13311
  case PM_CASE_PRIMITIVE: {
13127
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
13312
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
13128
13313
  return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
13129
13314
  }
13130
13315
  default: {
@@ -13135,7 +13320,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13135
13320
  }
13136
13321
  }
13137
13322
  case PM_CASE_PRIMITIVE: {
13138
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, diag_id);
13323
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, diag_id);
13139
13324
 
13140
13325
  // Now that we have a primitive, we need to check if it's part of a range.
13141
13326
  if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
@@ -13146,7 +13331,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13146
13331
  // node. Otherwise, we'll create an endless range.
13147
13332
  switch (parser->current.type) {
13148
13333
  case PM_CASE_PRIMITIVE: {
13149
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
13334
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
13150
13335
  return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
13151
13336
  }
13152
13337
  default:
@@ -13206,7 +13391,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13206
13391
  pm_token_t lparen = parser->current;
13207
13392
  parser_lex(parser);
13208
13393
 
13209
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
13394
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
13210
13395
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
13211
13396
 
13212
13397
  accept1(parser, PM_TOKEN_NEWLINE);
@@ -13307,9 +13492,13 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13307
13492
 
13308
13493
  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
13309
13494
  pm_token_t identifier = parser->previous;
13310
- pm_parser_local_add_token(parser, &identifier);
13495
+ int depth = pm_parser_local_depth(parser, &identifier);
13496
+ if (depth < 0) {
13497
+ depth = 0;
13498
+ pm_parser_local_add_token(parser, &identifier);
13499
+ }
13311
13500
 
13312
- pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &identifier);
13501
+ pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create_depth(parser, &identifier, (uint32_t) depth);
13313
13502
  node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
13314
13503
  }
13315
13504
 
@@ -13370,6 +13559,8 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
13370
13559
  while (accept1(parser, PM_TOKEN_COMMA)) {
13371
13560
  // Break early here in case we have a trailing comma.
13372
13561
  if (match5(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13562
+ node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13563
+ pm_node_list_append(&nodes, node);
13373
13564
  break;
13374
13565
  }
13375
13566
 
@@ -13460,13 +13651,15 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13460
13651
 
13461
13652
  // Here we have found a string literal. We'll parse it and add it to
13462
13653
  // the list of strings.
13463
- assert(parser->lex_modes.current->mode == PM_LEX_STRING);
13464
- bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
13654
+ const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
13655
+ assert(lex_mode->mode == PM_LEX_STRING);
13656
+ bool lex_interpolation = lex_mode->as.string.interpolation;
13465
13657
 
13466
13658
  pm_token_t opening = parser->current;
13467
13659
  parser_lex(parser);
13468
13660
 
13469
- if (accept1(parser, PM_TOKEN_STRING_END)) {
13661
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
13662
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13470
13663
  // If we get here, then we have an end immediately after a
13471
13664
  // start. In that case we'll create an empty content token and
13472
13665
  // return an uninterpolated string.
@@ -13489,15 +13682,16 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13489
13682
  // If we don't accept interpolation then we expect the string to
13490
13683
  // start with a single string content node.
13491
13684
  pm_string_t unescaped;
13685
+ pm_token_t content;
13492
13686
  if (match1(parser, PM_TOKEN_EOF)) {
13493
13687
  unescaped = PM_STRING_EMPTY;
13688
+ content = not_provided(parser);
13494
13689
  } else {
13495
13690
  unescaped = parser->current_string;
13691
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
13692
+ content = parser->previous;
13496
13693
  }
13497
13694
 
13498
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
13499
- pm_token_t content = parser->previous;
13500
-
13501
13695
  // It is unfortunately possible to have multiple string content
13502
13696
  // nodes in a row in the case that there's heredoc content in
13503
13697
  // the middle of the string, like this cursed example:
@@ -13526,6 +13720,9 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13526
13720
  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
13527
13721
  } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
13528
13722
  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
13723
+ } else if (match1(parser, PM_TOKEN_EOF)) {
13724
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_TERM);
13725
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
13529
13726
  } else {
13530
13727
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13531
13728
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
@@ -13539,9 +13736,10 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13539
13736
  pm_string_t unescaped = parser->current_string;
13540
13737
  parser_lex(parser);
13541
13738
 
13542
- if (match1(parser, PM_TOKEN_STRING_END)) {
13739
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
13543
13740
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
13544
- parser_lex(parser);
13741
+ pm_node_flag_set(node, parse_unescaped_encoding(parser));
13742
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13545
13743
  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
13546
13744
  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
13547
13745
  } else {
@@ -13552,6 +13750,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13552
13750
  pm_token_t string_closing = not_provided(parser);
13553
13751
 
13554
13752
  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
13753
+ pm_node_flag_set(part, parse_unescaped_encoding(parser));
13555
13754
  pm_node_list_append(&parts, part);
13556
13755
 
13557
13756
  while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
@@ -13562,6 +13761,9 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13562
13761
 
13563
13762
  if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
13564
13763
  node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
13764
+ } else if (match1(parser, PM_TOKEN_EOF)) {
13765
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
13766
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
13565
13767
  } else {
13566
13768
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
13567
13769
  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
@@ -13582,6 +13784,9 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13582
13784
 
13583
13785
  if (accept1(parser, PM_TOKEN_LABEL_END)) {
13584
13786
  node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
13787
+ } else if (match1(parser, PM_TOKEN_EOF)) {
13788
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
13789
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
13585
13790
  } else {
13586
13791
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
13587
13792
  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
@@ -13629,7 +13834,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13629
13834
  * Parse an expression that begins with the previous node that we just lexed.
13630
13835
  */
13631
13836
  static inline pm_node_t *
13632
- parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13837
+ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
13633
13838
  switch (parser->current.type) {
13634
13839
  case PM_TOKEN_BRACKET_LEFT_ARRAY: {
13635
13840
  parser_lex(parser);
@@ -13665,7 +13870,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13665
13870
  pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
13666
13871
  }
13667
13872
  } else {
13668
- expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR);
13873
+ expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR);
13669
13874
  }
13670
13875
 
13671
13876
  element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
@@ -13683,7 +13888,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13683
13888
 
13684
13889
  parsed_bare_hash = true;
13685
13890
  } else {
13686
- element = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_ARRAY_EXPRESSION);
13891
+ element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
13687
13892
 
13688
13893
  if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
13689
13894
  if (parsed_bare_hash) {
@@ -13699,7 +13904,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13699
13904
  operator = not_provided(parser);
13700
13905
  }
13701
13906
 
13702
- pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_VALUE);
13907
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
13703
13908
  pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
13704
13909
  pm_keyword_hash_node_elements_append(hash, assoc);
13705
13910
 
@@ -13740,7 +13945,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13740
13945
  // of statements within the parentheses.
13741
13946
  pm_accepts_block_stack_push(parser, true);
13742
13947
  context_push(parser, PM_CONTEXT_PARENS);
13743
- pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_CANNOT_PARSE_EXPRESSION);
13948
+ pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
13744
13949
  context_pop(parser);
13745
13950
 
13746
13951
  // Determine if this statement is followed by a terminator. In the
@@ -13816,7 +14021,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13816
14021
 
13817
14022
  // Parse each statement within the parentheses.
13818
14023
  while (true) {
13819
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_CANNOT_PARSE_EXPRESSION);
14024
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
13820
14025
  pm_statements_node_body_append(statements, node);
13821
14026
 
13822
14027
  // If we're recovering from a syntax error, then we need to stop
@@ -13879,6 +14084,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13879
14084
 
13880
14085
  pm_token_t closing = not_provided(parser);
13881
14086
  pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
14087
+ pm_node_flag_set(node, parse_unescaped_encoding(parser));
13882
14088
 
13883
14089
  // Characters can be followed by strings in which case they are
13884
14090
  // automatically concatenated.
@@ -13906,11 +14112,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13906
14112
  // fact a method call, not a constant read.
13907
14113
  if (
13908
14114
  match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
13909
- (binding_power <= PM_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14115
+ (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
13910
14116
  (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
13911
14117
  ) {
13912
14118
  pm_arguments_t arguments = { 0 };
13913
- parse_arguments_list(parser, &arguments, true);
14119
+ parse_arguments_list(parser, &arguments, true, accepts_command_call);
13914
14120
  return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
13915
14121
  }
13916
14122
 
@@ -13944,7 +14150,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13944
14150
  pm_token_t operator = parser->current;
13945
14151
  parser_lex(parser);
13946
14152
 
13947
- pm_node_t *right = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
14153
+ pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
13948
14154
  return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
13949
14155
  }
13950
14156
  case PM_TOKEN_FLOAT:
@@ -14003,10 +14209,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14003
14209
  pm_call_node_t *call = (pm_call_node_t *) node;
14004
14210
  pm_arguments_t arguments = { 0 };
14005
14211
 
14006
- if (parse_arguments_list(parser, &arguments, true)) {
14212
+ if (parse_arguments_list(parser, &arguments, true, accepts_command_call)) {
14007
14213
  // Since we found arguments, we need to turn off the
14008
14214
  // variable call bit in the flags.
14009
- call->base.flags &= (pm_node_flags_t) ~PM_CALL_NODE_FLAGS_VARIABLE_CALL;
14215
+ pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
14010
14216
 
14011
14217
  call->opening_loc = arguments.opening_loc;
14012
14218
  call->arguments = arguments.arguments;
@@ -14030,11 +14236,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14030
14236
  // can still be a method call if it is followed by arguments or
14031
14237
  // a block, so we need to check for that here.
14032
14238
  if (
14033
- (binding_power <= PM_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14239
+ (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14034
14240
  (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
14035
14241
  ) {
14036
14242
  pm_arguments_t arguments = { 0 };
14037
- parse_arguments_list(parser, &arguments, true);
14243
+ parse_arguments_list(parser, &arguments, true, accepts_command_call);
14038
14244
 
14039
14245
  pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
14040
14246
  pm_node_destroy(parser, node);
@@ -14065,7 +14271,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14065
14271
  if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
14066
14272
  // If we get here, then we have an empty heredoc. We'll create
14067
14273
  // an empty content token and return an empty string node.
14068
- lex_state_set(parser, PM_LEX_STATE_END);
14274
+ lex_mode_pop(parser);
14069
14275
  expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
14070
14276
  pm_token_t content = parse_strings_empty_content(parser->previous.start);
14071
14277
 
@@ -14086,6 +14292,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14086
14292
  // content and we're at the end of the heredoc, so we can return
14087
14293
  // just a string node with the heredoc opening and closing as
14088
14294
  // its opening and closing.
14295
+ pm_node_flag_set(part, parse_unescaped_encoding(parser));
14089
14296
  pm_string_node_t *cast = (pm_string_node_t *) part;
14090
14297
 
14091
14298
  cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
@@ -14097,13 +14304,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14097
14304
  cast->base.type = PM_X_STRING_NODE;
14098
14305
  }
14099
14306
 
14100
- size_t common_whitespace = parser->current_string_common_whitespace;
14307
+ size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
14101
14308
  if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
14102
14309
  parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
14103
14310
  }
14104
14311
 
14105
14312
  node = (pm_node_t *) cast;
14106
- lex_state_set(parser, PM_LEX_STATE_END);
14313
+ lex_mode_pop(parser);
14107
14314
  expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
14108
14315
  } else {
14109
14316
  // If we get here, then we have multiple parts in the heredoc,
@@ -14118,13 +14325,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14118
14325
  }
14119
14326
  }
14120
14327
 
14328
+ size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
14329
+
14121
14330
  // Now that we have all of the parts, create the correct type of
14122
14331
  // interpolated node.
14123
14332
  if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
14124
14333
  pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
14125
14334
  cast->parts = parts;
14126
14335
 
14127
- lex_state_set(parser, PM_LEX_STATE_END);
14336
+ lex_mode_pop(parser);
14128
14337
  expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
14129
14338
 
14130
14339
  pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
@@ -14133,7 +14342,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14133
14342
  } else {
14134
14343
  pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
14135
14344
 
14136
- lex_state_set(parser, PM_LEX_STATE_END);
14345
+ lex_mode_pop(parser);
14137
14346
  expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
14138
14347
 
14139
14348
  pm_interpolated_string_node_closing_set(cast, &parser->previous);
@@ -14143,7 +14352,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14143
14352
 
14144
14353
  // If this is a heredoc that is indented with a ~, then we need
14145
14354
  // to dedent each line by the common leading whitespace.
14146
- size_t common_whitespace = parser->current_string_common_whitespace;
14147
14355
  if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
14148
14356
  pm_node_list_t *nodes;
14149
14357
  if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
@@ -14202,6 +14410,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14202
14410
  parser_lex(parser);
14203
14411
  return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
14204
14412
  case PM_TOKEN_KEYWORD_ALIAS: {
14413
+ if (binding_power != PM_BINDING_POWER_STATEMENT) {
14414
+ pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
14415
+ }
14416
+
14205
14417
  parser_lex(parser);
14206
14418
  pm_token_t keyword = parser->previous;
14207
14419
 
@@ -14246,7 +14458,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14246
14458
  } else if (!token_begins_expression_p(parser->current.type)) {
14247
14459
  predicate = NULL;
14248
14460
  } else {
14249
- predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CASE_EXPRESSION_AFTER_CASE);
14461
+ predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CASE_EXPRESSION_AFTER_CASE);
14250
14462
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
14251
14463
  }
14252
14464
 
@@ -14273,14 +14485,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14273
14485
  do {
14274
14486
  if (accept1(parser, PM_TOKEN_USTAR)) {
14275
14487
  pm_token_t operator = parser->previous;
14276
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
14488
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
14277
14489
 
14278
14490
  pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
14279
14491
  pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
14280
14492
 
14281
14493
  if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
14282
14494
  } else {
14283
- pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CASE_EXPRESSION_AFTER_WHEN);
14495
+ pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN);
14284
14496
  pm_when_node_conditions_append(when_node, condition);
14285
14497
 
14286
14498
  if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
@@ -14337,11 +14549,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14337
14549
  // for guard clauses in the form of `if` or `unless` statements.
14338
14550
  if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
14339
14551
  pm_token_t keyword = parser->previous;
14340
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CONDITIONAL_IF_PREDICATE);
14552
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
14341
14553
  pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
14342
14554
  } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
14343
14555
  pm_token_t keyword = parser->previous;
14344
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
14556
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
14345
14557
  pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
14346
14558
  }
14347
14559
 
@@ -14426,7 +14638,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14426
14638
  }
14427
14639
 
14428
14640
  pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
14429
- parse_rescues(parser, begin_node);
14641
+ parse_rescues(parser, begin_node, false);
14430
14642
 
14431
14643
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
14432
14644
  begin_node->base.location.end = parser->previous.end;
@@ -14439,6 +14651,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14439
14651
  return (pm_node_t *) begin_node;
14440
14652
  }
14441
14653
  case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
14654
+ if (binding_power != PM_BINDING_POWER_STATEMENT) {
14655
+ pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
14656
+ }
14657
+
14442
14658
  parser_lex(parser);
14443
14659
  pm_token_t keyword = parser->previous;
14444
14660
 
@@ -14496,7 +14712,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14496
14712
 
14497
14713
  pm_token_t keyword = parser->previous;
14498
14714
  pm_arguments_t arguments = { 0 };
14499
- parse_arguments_list(parser, &arguments, true);
14715
+ parse_arguments_list(parser, &arguments, true, accepts_command_call);
14500
14716
 
14501
14717
  if (
14502
14718
  arguments.opening_loc.start == NULL &&
@@ -14513,7 +14729,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14513
14729
 
14514
14730
  pm_token_t keyword = parser->previous;
14515
14731
  pm_arguments_t arguments = { 0 };
14516
- parse_arguments_list(parser, &arguments, false);
14732
+ parse_arguments_list(parser, &arguments, false, accepts_command_call);
14517
14733
 
14518
14734
  return (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
14519
14735
  }
@@ -14524,8 +14740,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14524
14740
 
14525
14741
  if (accept1(parser, PM_TOKEN_LESS_LESS)) {
14526
14742
  pm_token_t operator = parser->previous;
14527
- pm_node_t *expression = parse_expression(parser, PM_BINDING_POWER_NOT, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
14743
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
14528
14744
 
14745
+ pm_constant_id_t old_param_name = parser->current_param_name;
14746
+ parser->current_param_name = 0;
14529
14747
  pm_parser_scope_push(parser, true);
14530
14748
  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14531
14749
 
@@ -14538,18 +14756,19 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14538
14756
 
14539
14757
  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14540
14758
  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14541
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements);
14759
+ statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
14542
14760
  }
14543
14761
 
14544
14762
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
14545
14763
 
14546
14764
  pm_constant_id_list_t locals = parser->current_scope->locals;
14547
14765
  pm_parser_scope_pop(parser);
14766
+ parser->current_param_name = old_param_name;
14548
14767
  pm_do_loop_stack_pop(parser);
14549
14768
  return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
14550
14769
  }
14551
14770
 
14552
- pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_CLASS_NAME);
14771
+ pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_CLASS_NAME);
14553
14772
  pm_token_t name = parser->previous;
14554
14773
  if (name.type != PM_TOKEN_CONSTANT) {
14555
14774
  pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
@@ -14565,12 +14784,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14565
14784
  parser->command_start = true;
14566
14785
  parser_lex(parser);
14567
14786
 
14568
- superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CLASS_SUPERCLASS);
14787
+ superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CLASS_SUPERCLASS);
14569
14788
  } else {
14570
14789
  inheritance_operator = not_provided(parser);
14571
14790
  superclass = NULL;
14572
14791
  }
14573
14792
 
14793
+ pm_constant_id_t old_param_name = parser->current_param_name;
14794
+ parser->current_param_name = 0;
14574
14795
  pm_parser_scope_push(parser, true);
14575
14796
  if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
14576
14797
  expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
@@ -14587,7 +14808,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14587
14808
 
14588
14809
  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14589
14810
  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14590
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements);
14811
+ statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
14591
14812
  }
14592
14813
 
14593
14814
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
@@ -14598,6 +14819,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14598
14819
 
14599
14820
  pm_constant_id_list_t locals = parser->current_scope->locals;
14600
14821
  pm_parser_scope_pop(parser);
14822
+ parser->current_param_name = old_param_name;
14601
14823
  pm_do_loop_stack_pop(parser);
14602
14824
 
14603
14825
  if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
@@ -14613,12 +14835,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14613
14835
  pm_token_t operator = not_provided(parser);
14614
14836
  pm_token_t name = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = def_keyword.end, .end = def_keyword.end };
14615
14837
 
14838
+ // This context is necessary for lexing `...` in a bare params correctly.
14839
+ // It must be pushed before lexing the first param, so it is here.
14616
14840
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
14617
14841
  parser_lex(parser);
14842
+ pm_constant_id_t old_param_name = parser->current_param_name;
14618
14843
 
14619
14844
  switch (parser->current.type) {
14620
14845
  case PM_CASE_OPERATOR:
14621
14846
  pm_parser_scope_push(parser, true);
14847
+ parser->current_param_name = 0;
14622
14848
  lex_state_set(parser, PM_LEX_STATE_ENDFN);
14623
14849
  parser_lex(parser);
14624
14850
  name = parser->previous;
@@ -14630,6 +14856,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14630
14856
  receiver = parse_variable_call(parser);
14631
14857
 
14632
14858
  pm_parser_scope_push(parser, true);
14859
+ parser->current_param_name = 0;
14633
14860
  lex_state_set(parser, PM_LEX_STATE_FNAME);
14634
14861
  parser_lex(parser);
14635
14862
 
@@ -14638,6 +14865,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14638
14865
  } else {
14639
14866
  pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
14640
14867
  pm_parser_scope_push(parser, true);
14868
+ parser->current_param_name = 0;
14641
14869
  name = parser->previous;
14642
14870
  }
14643
14871
 
@@ -14655,6 +14883,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14655
14883
  case PM_TOKEN_KEYWORD___LINE__:
14656
14884
  case PM_TOKEN_KEYWORD___ENCODING__: {
14657
14885
  pm_parser_scope_push(parser, true);
14886
+ parser->current_param_name = 0;
14658
14887
  parser_lex(parser);
14659
14888
  pm_token_t identifier = parser->previous;
14660
14889
 
@@ -14708,9 +14937,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14708
14937
  break;
14709
14938
  }
14710
14939
  case PM_TOKEN_PARENTHESIS_LEFT: {
14940
+ // The current context is `PM_CONTEXT_DEF_PARAMS`, however the inner expression
14941
+ // of this parenthesis should not be processed under this context.
14942
+ // Thus, the context is popped here.
14943
+ context_pop(parser);
14711
14944
  parser_lex(parser);
14945
+
14712
14946
  pm_token_t lparen = parser->previous;
14713
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_DEF_RECEIVER);
14947
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_DEF_RECEIVER);
14714
14948
 
14715
14949
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
14716
14950
  pm_token_t rparen = parser->previous;
@@ -14722,11 +14956,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14722
14956
  receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
14723
14957
 
14724
14958
  pm_parser_scope_push(parser, true);
14959
+ parser->current_param_name = 0;
14960
+
14961
+ // To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as described the above.
14962
+ context_push(parser, PM_CONTEXT_DEF_PARAMS);
14725
14963
  name = parse_method_definition_name(parser);
14726
14964
  break;
14727
14965
  }
14728
14966
  default:
14729
14967
  pm_parser_scope_push(parser, true);
14968
+ parser->current_param_name = 0;
14730
14969
  name = parse_method_definition_name(parser);
14731
14970
  break;
14732
14971
  }
@@ -14779,6 +15018,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14779
15018
  }
14780
15019
  }
14781
15020
 
15021
+ uint32_t locals_body_index = (uint32_t) parser->current_scope->locals.size;
15022
+
14782
15023
  context_pop(parser);
14783
15024
  pm_node_t *statements = NULL;
14784
15025
  pm_token_t equal;
@@ -14794,11 +15035,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14794
15035
  pm_do_loop_stack_push(parser, false);
14795
15036
  statements = (pm_node_t *) pm_statements_node_create(parser);
14796
15037
 
14797
- pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, PM_ERR_DEF_ENDLESS);
15038
+ pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, PM_ERR_DEF_ENDLESS);
14798
15039
 
14799
15040
  if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
14800
15041
  pm_token_t rescue_keyword = parser->previous;
14801
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_RESCUE_MODIFIER_VALUE);
15042
+ pm_node_t *value = parse_expression(parser, binding_power, false, PM_ERR_RESCUE_MODIFIER_VALUE);
14802
15043
  pm_rescue_modifier_node_t *rescue_node = pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
14803
15044
  statement = (pm_node_t *)rescue_node;
14804
15045
  }
@@ -14829,7 +15070,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14829
15070
 
14830
15071
  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14831
15072
  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14832
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements);
15073
+ statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, true);
14833
15074
  }
14834
15075
 
14835
15076
  pm_accepts_block_stack_pop(parser);
@@ -14839,6 +15080,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14839
15080
  }
14840
15081
 
14841
15082
  pm_constant_id_list_t locals = parser->current_scope->locals;
15083
+ parser->current_param_name = old_param_name;
14842
15084
  pm_parser_scope_pop(parser);
14843
15085
 
14844
15086
  return (pm_node_t *) pm_def_node_create(
@@ -14848,6 +15090,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14848
15090
  params,
14849
15091
  statements,
14850
15092
  &locals,
15093
+ locals_body_index,
14851
15094
  &def_keyword,
14852
15095
  &operator,
14853
15096
  &lparen,
@@ -14866,18 +15109,19 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14866
15109
 
14867
15110
  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14868
15111
  lparen = parser->previous;
14869
- expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_DEFINED_EXPRESSION);
15112
+ expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_DEFINED_EXPRESSION);
14870
15113
 
14871
15114
  if (parser->recovering) {
14872
15115
  rparen = not_provided(parser);
14873
15116
  } else {
15117
+ accept1(parser, PM_TOKEN_NEWLINE);
14874
15118
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
14875
15119
  rparen = parser->previous;
14876
15120
  }
14877
15121
  } else {
14878
15122
  lparen = not_provided(parser);
14879
15123
  rparen = not_provided(parser);
14880
- expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_DEFINED_EXPRESSION);
15124
+ expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_DEFINED_EXPRESSION);
14881
15125
  }
14882
15126
 
14883
15127
  return (pm_node_t *) pm_defined_node_create(
@@ -14889,6 +15133,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14889
15133
  );
14890
15134
  }
14891
15135
  case PM_TOKEN_KEYWORD_END_UPCASE: {
15136
+ if (binding_power != PM_BINDING_POWER_STATEMENT) {
15137
+ pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
15138
+ }
15139
+
14892
15140
  parser_lex(parser);
14893
15141
  pm_token_t keyword = parser->previous;
14894
15142
 
@@ -14911,7 +15159,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14911
15159
  pm_token_t for_keyword = parser->previous;
14912
15160
  pm_node_t *index;
14913
15161
 
14914
- pm_parser_scope_push_transparent(parser);
14915
15162
  context_push(parser, PM_CONTEXT_FOR_INDEX);
14916
15163
 
14917
15164
  // First, parse out the first index expression.
@@ -14920,12 +15167,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14920
15167
  pm_node_t *name = NULL;
14921
15168
 
14922
15169
  if (token_begins_expression_p(parser->current.type)) {
14923
- name = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
15170
+ name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
14924
15171
  }
14925
15172
 
14926
15173
  index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
14927
15174
  } else if (token_begins_expression_p(parser->current.type)) {
14928
- index = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
15175
+ index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
14929
15176
  } else {
14930
15177
  pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
14931
15178
  index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
@@ -14939,13 +15186,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14939
15186
  }
14940
15187
 
14941
15188
  context_pop(parser);
14942
- pm_parser_scope_pop(parser);
14943
15189
  pm_do_loop_stack_push(parser, true);
14944
15190
 
14945
15191
  expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
14946
15192
  pm_token_t in_keyword = parser->previous;
14947
15193
 
14948
- pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_FOR_COLLECTION);
15194
+ pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_FOR_COLLECTION);
14949
15195
  pm_do_loop_stack_pop(parser);
14950
15196
 
14951
15197
  pm_token_t do_keyword;
@@ -14959,10 +15205,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14959
15205
  pm_statements_node_t *statements = NULL;
14960
15206
 
14961
15207
  if (!accept1(parser, PM_TOKEN_KEYWORD_END)) {
14962
- pm_parser_scope_push_transparent(parser);
14963
15208
  statements = parse_statements(parser, PM_CONTEXT_FOR);
14964
15209
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
14965
- pm_parser_scope_pop(parser);
14966
15210
  }
14967
15211
 
14968
15212
  return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
@@ -14971,6 +15215,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14971
15215
  parser_lex(parser);
14972
15216
  return parse_conditional(parser, PM_CONTEXT_IF);
14973
15217
  case PM_TOKEN_KEYWORD_UNDEF: {
15218
+ if (binding_power != PM_BINDING_POWER_STATEMENT) {
15219
+ pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
15220
+ }
15221
+
14974
15222
  parser_lex(parser);
14975
15223
  pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
14976
15224
  pm_node_t *name = parse_undef_argument(parser);
@@ -15011,7 +15259,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15011
15259
  if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15012
15260
  arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15013
15261
  } else {
15014
- receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_NOT_EXPRESSION);
15262
+ receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_NOT_EXPRESSION);
15015
15263
  pm_conditional_predicate(receiver);
15016
15264
 
15017
15265
  if (!parser->recovering) {
@@ -15021,7 +15269,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15021
15269
  }
15022
15270
  }
15023
15271
  } else {
15024
- receiver = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_NOT_EXPRESSION);
15272
+ receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_NOT_EXPRESSION);
15025
15273
  pm_conditional_predicate(receiver);
15026
15274
  }
15027
15275
 
@@ -15034,7 +15282,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15034
15282
  parser_lex(parser);
15035
15283
 
15036
15284
  pm_token_t module_keyword = parser->previous;
15037
- pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_MODULE_NAME);
15285
+ pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_MODULE_NAME);
15038
15286
  pm_token_t name;
15039
15287
 
15040
15288
  // If we can recover from a syntax error that occurred while parsing
@@ -15061,6 +15309,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15061
15309
  pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
15062
15310
  }
15063
15311
 
15312
+ pm_constant_id_t old_param_name = parser->current_param_name;
15313
+ parser->current_param_name = 0;
15064
15314
  pm_parser_scope_push(parser, true);
15065
15315
  accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
15066
15316
  pm_node_t *statements = NULL;
@@ -15073,11 +15323,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15073
15323
 
15074
15324
  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15075
15325
  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15076
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements);
15326
+ statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
15077
15327
  }
15078
15328
 
15079
15329
  pm_constant_id_list_t locals = parser->current_scope->locals;
15080
15330
  pm_parser_scope_pop(parser);
15331
+ parser->current_param_name = old_param_name;
15081
15332
 
15082
15333
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
15083
15334
 
@@ -15107,7 +15358,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15107
15358
  parser_lex(parser);
15108
15359
  pm_token_t keyword = parser->previous;
15109
15360
 
15110
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
15361
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
15111
15362
  pm_do_loop_stack_pop(parser);
15112
15363
 
15113
15364
  expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
@@ -15128,7 +15379,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15128
15379
  parser_lex(parser);
15129
15380
  pm_token_t keyword = parser->previous;
15130
15381
 
15131
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
15382
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
15132
15383
  pm_do_loop_stack_pop(parser);
15133
15384
 
15134
15385
  expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
@@ -15146,7 +15397,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15146
15397
  }
15147
15398
  case PM_TOKEN_PERCENT_LOWER_I: {
15148
15399
  parser_lex(parser);
15149
- pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
15400
+ pm_token_t opening = parser->previous;
15401
+ pm_array_node_t *array = pm_array_node_create(parser, &opening);
15150
15402
 
15151
15403
  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15152
15404
  accept1(parser, PM_TOKEN_WORDS_SEP);
@@ -15161,14 +15413,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15161
15413
  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
15162
15414
  }
15163
15415
 
15164
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
15165
- pm_array_node_close_set(array, &parser->previous);
15416
+ pm_token_t closing = parser->current;
15417
+ if (match1(parser, PM_TOKEN_EOF)) {
15418
+ pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
15419
+ closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15420
+ } else {
15421
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
15422
+ }
15423
+ pm_array_node_close_set(array, &closing);
15166
15424
 
15167
15425
  return (pm_node_t *) array;
15168
15426
  }
15169
15427
  case PM_TOKEN_PERCENT_UPPER_I: {
15170
15428
  parser_lex(parser);
15171
- pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
15429
+ pm_token_t opening = parser->previous;
15430
+ pm_array_node_t *array = pm_array_node_create(parser, &opening);
15172
15431
 
15173
15432
  // This is the current node that we are parsing that will be added to the
15174
15433
  // list of elements.
@@ -15308,14 +15567,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15308
15567
  pm_array_node_elements_append(array, current);
15309
15568
  }
15310
15569
 
15311
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
15312
- pm_array_node_close_set(array, &parser->previous);
15570
+ pm_token_t closing = parser->current;
15571
+ if (match1(parser, PM_TOKEN_EOF)) {
15572
+ pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
15573
+ closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15574
+ } else {
15575
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
15576
+ }
15577
+ pm_array_node_close_set(array, &closing);
15313
15578
 
15314
15579
  return (pm_node_t *) array;
15315
15580
  }
15316
15581
  case PM_TOKEN_PERCENT_LOWER_W: {
15317
15582
  parser_lex(parser);
15318
- pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
15583
+ pm_token_t opening = parser->previous;
15584
+ pm_array_node_t *array = pm_array_node_create(parser, &opening);
15319
15585
 
15320
15586
  // skip all leading whitespaces
15321
15587
  accept1(parser, PM_TOKEN_WORDS_SEP);
@@ -15335,28 +15601,40 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15335
15601
  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
15336
15602
  }
15337
15603
 
15338
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
15339
- pm_array_node_close_set(array, &parser->previous);
15604
+ pm_token_t closing = parser->current;
15605
+ if (match1(parser, PM_TOKEN_EOF)) {
15606
+ pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
15607
+ closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15608
+ } else {
15609
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
15610
+ }
15340
15611
 
15612
+ pm_array_node_close_set(array, &closing);
15341
15613
  return (pm_node_t *) array;
15342
15614
  }
15343
15615
  case PM_TOKEN_PERCENT_UPPER_W: {
15344
15616
  parser_lex(parser);
15345
- pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
15617
+ pm_token_t opening = parser->previous;
15618
+ pm_array_node_t *array = pm_array_node_create(parser, &opening);
15346
15619
 
15347
- // This is the current node that we are parsing that will be added to the
15348
- // list of elements.
15620
+ // This is the current node that we are parsing that will be added
15621
+ // to the list of elements.
15349
15622
  pm_node_t *current = NULL;
15350
15623
 
15351
15624
  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15352
15625
  switch (parser->current.type) {
15353
15626
  case PM_TOKEN_WORDS_SEP: {
15627
+ // Reset the explicit encoding if we hit a separator
15628
+ // since each element can have its own encoding.
15629
+ parser->explicit_encoding = NULL;
15630
+
15354
15631
  if (current == NULL) {
15355
- // If we hit a separator before we have any content, then we don't
15356
- // need to do anything.
15632
+ // If we hit a separator before we have any content,
15633
+ // then we don't need to do anything.
15357
15634
  } else {
15358
- // If we hit a separator after we've hit content, then we need to
15359
- // append that content to the list and reset the current node.
15635
+ // If we hit a separator after we've hit content,
15636
+ // then we need to append that content to the list
15637
+ // and reset the current node.
15360
15638
  pm_array_node_elements_append(array, current);
15361
15639
  current = NULL;
15362
15640
  }
@@ -15369,22 +15647,25 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15369
15647
  pm_token_t closing = not_provided(parser);
15370
15648
 
15371
15649
  pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
15650
+ pm_node_flag_set(string, parse_unescaped_encoding(parser));
15372
15651
  parser_lex(parser);
15373
15652
 
15374
15653
  if (current == NULL) {
15375
- // If we hit content and the current node is NULL, then this is
15376
- // the first string content we've seen. In that case we're going
15377
- // to create a new string node and set that to the current.
15654
+ // If we hit content and the current node is NULL,
15655
+ // then this is the first string content we've seen.
15656
+ // In that case we're going to create a new string
15657
+ // node and set that to the current.
15378
15658
  current = string;
15379
15659
  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
15380
- // If we hit string content and the current node is an
15381
- // interpolated string, then we need to append the string content
15382
- // to the list of child nodes.
15660
+ // If we hit string content and the current node is
15661
+ // an interpolated string, then we need to append
15662
+ // the string content to the list of child nodes.
15383
15663
  pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
15384
15664
  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
15385
- // If we hit string content and the current node is a string node,
15386
- // then we need to convert the current node into an interpolated
15387
- // string and add the string content to the list of child nodes.
15665
+ // If we hit string content and the current node is
15666
+ // a string node, then we need to convert the
15667
+ // current node into an interpolated string and add
15668
+ // the string content to the list of child nodes.
15388
15669
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15389
15670
  pm_interpolated_string_node_append(interpolated, current);
15390
15671
  pm_interpolated_string_node_append(interpolated, string);
@@ -15397,24 +15678,27 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15397
15678
  }
15398
15679
  case PM_TOKEN_EMBVAR: {
15399
15680
  if (current == NULL) {
15400
- // If we hit an embedded variable and the current node is NULL,
15401
- // then this is the start of a new string. We'll set the current
15402
- // node to a new interpolated string.
15681
+ // If we hit an embedded variable and the current
15682
+ // node is NULL, then this is the start of a new
15683
+ // string. We'll set the current node to a new
15684
+ // interpolated string.
15403
15685
  pm_token_t opening = not_provided(parser);
15404
15686
  pm_token_t closing = not_provided(parser);
15405
15687
  current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15406
15688
  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
15407
- // If we hit an embedded variable and the current node is a string
15408
- // node, then we'll convert the current into an interpolated
15409
- // string and add the string node to the list of parts.
15689
+ // If we hit an embedded variable and the current
15690
+ // node is a string node, then we'll convert the
15691
+ // current into an interpolated string and add the
15692
+ // string node to the list of parts.
15410
15693
  pm_token_t opening = not_provided(parser);
15411
15694
  pm_token_t closing = not_provided(parser);
15412
15695
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15413
15696
  pm_interpolated_string_node_append(interpolated, current);
15414
15697
  current = (pm_node_t *) interpolated;
15415
15698
  } else {
15416
- // If we hit an embedded variable and the current node is an
15417
- // interpolated string, then we'll just add the embedded variable.
15699
+ // If we hit an embedded variable and the current
15700
+ // node is an interpolated string, then we'll just
15701
+ // add the embedded variable.
15418
15702
  }
15419
15703
 
15420
15704
  pm_node_t *part = parse_string_part(parser);
@@ -15423,25 +15707,27 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15423
15707
  }
15424
15708
  case PM_TOKEN_EMBEXPR_BEGIN: {
15425
15709
  if (current == NULL) {
15426
- // If we hit an embedded expression and the current node is NULL,
15427
- // then this is the start of a new string. We'll set the current
15428
- // node to a new interpolated string.
15710
+ // If we hit an embedded expression and the current
15711
+ // node is NULL, then this is the start of a new
15712
+ // string. We'll set the current node to a new
15713
+ // interpolated string.
15429
15714
  pm_token_t opening = not_provided(parser);
15430
15715
  pm_token_t closing = not_provided(parser);
15431
15716
  current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15432
15717
  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
15433
- // If we hit an embedded expression and the current node is a
15434
- // string node, then we'll convert the current into an
15435
- // interpolated string and add the string node to the list of
15436
- // parts.
15718
+ // If we hit an embedded expression and the current
15719
+ // node is a string node, then we'll convert the
15720
+ // current into an interpolated string and add the
15721
+ // string node to the list of parts.
15437
15722
  pm_token_t opening = not_provided(parser);
15438
15723
  pm_token_t closing = not_provided(parser);
15439
15724
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15440
15725
  pm_interpolated_string_node_append(interpolated, current);
15441
15726
  current = (pm_node_t *) interpolated;
15442
15727
  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
15443
- // If we hit an embedded expression and the current node is an
15444
- // interpolated string, then we'll just continue on.
15728
+ // If we hit an embedded expression and the current
15729
+ // node is an interpolated string, then we'll just
15730
+ // continue on.
15445
15731
  } else {
15446
15732
  assert(false && "unreachable");
15447
15733
  }
@@ -15462,9 +15748,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15462
15748
  pm_array_node_elements_append(array, current);
15463
15749
  }
15464
15750
 
15465
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
15466
- pm_array_node_close_set(array, &parser->previous);
15751
+ pm_token_t closing = parser->current;
15752
+ if (match1(parser, PM_TOKEN_EOF)) {
15753
+ pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
15754
+ closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15755
+ } else {
15756
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
15757
+ }
15467
15758
 
15759
+ pm_array_node_close_set(array, &closing);
15468
15760
  return (pm_node_t *) array;
15469
15761
  }
15470
15762
  case PM_TOKEN_REGEXP_BEGIN: {
@@ -15527,8 +15819,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15527
15819
  }
15528
15820
  }
15529
15821
 
15530
- expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
15531
- pm_interpolated_regular_expression_node_closing_set(node, &parser->previous);
15822
+ pm_token_t closing = parser->current;
15823
+ if (match1(parser, PM_TOKEN_EOF)) {
15824
+ pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
15825
+ closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15826
+ } else {
15827
+ expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
15828
+ }
15829
+ pm_interpolated_regular_expression_node_closing_set(node, &closing);
15532
15830
 
15533
15831
  return (pm_node_t *) node;
15534
15832
  }
@@ -15566,8 +15864,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15566
15864
  pm_token_t content = parser->current;
15567
15865
  parser_lex(parser);
15568
15866
 
15569
- if (accept1(parser, PM_TOKEN_STRING_END)) {
15570
- return (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
15867
+ if (match1(parser, PM_TOKEN_STRING_END)) {
15868
+ pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
15869
+ pm_node_flag_set(node, parse_unescaped_encoding(parser));
15870
+ parser_lex(parser);
15871
+ return node;
15571
15872
  }
15572
15873
 
15573
15874
  // If we get here, then we have interpolation so we'll need to
@@ -15576,7 +15877,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15576
15877
 
15577
15878
  pm_token_t opening = not_provided(parser);
15578
15879
  pm_token_t closing = not_provided(parser);
15880
+
15579
15881
  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
15882
+ pm_node_flag_set(part, parse_unescaped_encoding(parser));
15580
15883
 
15581
15884
  pm_interpolated_xstring_node_append(node, part);
15582
15885
  } else {
@@ -15593,8 +15896,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15593
15896
  }
15594
15897
  }
15595
15898
 
15596
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
15597
- pm_interpolated_xstring_node_closing_set(node, &parser->previous);
15899
+ pm_token_t closing = parser->current;
15900
+ if (match1(parser, PM_TOKEN_EOF)) {
15901
+ pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
15902
+ closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15903
+ } else {
15904
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
15905
+ }
15906
+ pm_interpolated_xstring_node_closing_set(node, &closing);
15907
+
15598
15908
  return (pm_node_t *) node;
15599
15909
  }
15600
15910
  case PM_TOKEN_USTAR: {
@@ -15611,7 +15921,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15611
15921
  pm_node_t *name = NULL;
15612
15922
 
15613
15923
  if (token_begins_expression_p(parser->current.type)) {
15614
- name = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
15924
+ name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
15615
15925
  }
15616
15926
 
15617
15927
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
@@ -15626,7 +15936,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15626
15936
  parser_lex(parser);
15627
15937
 
15628
15938
  pm_token_t operator = parser->previous;
15629
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, PM_ERR_UNARY_RECEIVER_BANG);
15939
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER_BANG);
15630
15940
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
15631
15941
 
15632
15942
  pm_conditional_predicate(receiver);
@@ -15636,7 +15946,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15636
15946
  parser_lex(parser);
15637
15947
 
15638
15948
  pm_token_t operator = parser->previous;
15639
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, PM_ERR_UNARY_RECEIVER_TILDE);
15949
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_TILDE);
15640
15950
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
15641
15951
 
15642
15952
  return (pm_node_t *) node;
@@ -15645,7 +15955,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15645
15955
  parser_lex(parser);
15646
15956
 
15647
15957
  pm_token_t operator = parser->previous;
15648
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, PM_ERR_UNARY_RECEIVER_MINUS);
15958
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_MINUS);
15649
15959
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
15650
15960
 
15651
15961
  return (pm_node_t *) node;
@@ -15654,11 +15964,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15654
15964
  parser_lex(parser);
15655
15965
 
15656
15966
  pm_token_t operator = parser->previous;
15657
- pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, PM_ERR_UNARY_RECEIVER_MINUS);
15967
+ pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_MINUS);
15658
15968
 
15659
15969
  if (accept1(parser, PM_TOKEN_STAR_STAR)) {
15660
15970
  pm_token_t exponent_operator = parser->previous;
15661
- pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, PM_ERR_EXPECT_ARGUMENT);
15971
+ pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, PM_ERR_EXPECT_ARGUMENT);
15662
15972
  node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent);
15663
15973
  node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
15664
15974
  } else {
@@ -15686,7 +15996,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15686
15996
 
15687
15997
  pm_token_t operator = parser->previous;
15688
15998
  pm_parser_scope_push(parser, false);
15689
- pm_block_parameters_node_t *params;
15999
+ pm_block_parameters_node_t *block_parameters;
15690
16000
 
15691
16001
  switch (parser->current.type) {
15692
16002
  case PM_TOKEN_PARENTHESIS_LEFT: {
@@ -15695,31 +16005,37 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15695
16005
  parser_lex(parser);
15696
16006
 
15697
16007
  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15698
- params = pm_block_parameters_node_create(parser, NULL, &opening);
16008
+ block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
15699
16009
  } else {
15700
- params = parse_block_parameters(parser, false, &opening, true);
16010
+ block_parameters = parse_block_parameters(parser, false, &opening, true);
15701
16011
  }
15702
16012
 
15703
16013
  accept1(parser, PM_TOKEN_NEWLINE);
15704
16014
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
15705
16015
 
15706
- pm_block_parameters_node_closing_set(params, &parser->previous);
16016
+ pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
15707
16017
  break;
15708
16018
  }
15709
16019
  case PM_CASE_PARAMETER: {
15710
16020
  parser->current_scope->explicit_params = true;
15711
16021
  pm_accepts_block_stack_push(parser, false);
15712
16022
  pm_token_t opening = not_provided(parser);
15713
- params = parse_block_parameters(parser, false, &opening, true);
16023
+ block_parameters = parse_block_parameters(parser, false, &opening, true);
15714
16024
  pm_accepts_block_stack_pop(parser);
15715
16025
  break;
15716
16026
  }
15717
16027
  default: {
15718
- params = NULL;
16028
+ block_parameters = NULL;
15719
16029
  break;
15720
16030
  }
15721
16031
  }
15722
16032
 
16033
+ uint32_t locals_body_index = 0;
16034
+
16035
+ if (block_parameters) {
16036
+ locals_body_index = (uint32_t) parser->current_scope->locals.size;
16037
+ }
16038
+
15723
16039
  pm_token_t opening;
15724
16040
  pm_node_t *body = NULL;
15725
16041
  parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
@@ -15743,22 +16059,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15743
16059
 
15744
16060
  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15745
16061
  assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
15746
- body = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) body);
16062
+ body = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) body, false);
15747
16063
  }
15748
16064
 
15749
16065
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
15750
16066
  }
15751
16067
 
16068
+ pm_node_t *parameters = (pm_node_t *) block_parameters;
16069
+ uint8_t maximum = parser->current_scope->numbered_parameters;
16070
+
16071
+ if (parameters == NULL && (maximum > 0)) {
16072
+ parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = operator.start, .end = parser->previous.end }, maximum);
16073
+ locals_body_index = maximum;
16074
+ }
16075
+
15752
16076
  pm_constant_id_list_t locals = parser->current_scope->locals;
15753
16077
  pm_parser_scope_pop(parser);
15754
16078
  pm_accepts_block_stack_pop(parser);
15755
- return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, params, body);
16079
+ return (pm_node_t *) pm_lambda_node_create(parser, &locals, locals_body_index, &operator, &opening, &parser->previous, parameters, body);
15756
16080
  }
15757
16081
  case PM_TOKEN_UPLUS: {
15758
16082
  parser_lex(parser);
15759
16083
 
15760
16084
  pm_token_t operator = parser->previous;
15761
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, PM_ERR_UNARY_RECEIVER_PLUS);
16085
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_PLUS);
15762
16086
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
15763
16087
 
15764
16088
  return (pm_node_t *) node;
@@ -15781,14 +16105,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15781
16105
  }
15782
16106
 
15783
16107
  static inline pm_node_t *
15784
- parse_assignment_value(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
15785
- pm_node_t *value = parse_value_expression(parser, binding_power, diag_id);
16108
+ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
16109
+ pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id);
15786
16110
 
15787
16111
  // Contradicting binding powers, the right-hand-side value of rthe assignment allows the `rescue` modifier.
15788
16112
  if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
15789
16113
  pm_token_t rescue = parser->current;
15790
16114
  parser_lex(parser);
15791
- pm_node_t *right = parse_expression(parser, binding_power, PM_ERR_RESCUE_MODIFIER_VALUE);
16115
+ pm_node_t *right = parse_expression(parser, binding_power, false, PM_ERR_RESCUE_MODIFIER_VALUE);
15792
16116
 
15793
16117
  return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
15794
16118
  }
@@ -15798,8 +16122,8 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t binding_power, pm
15798
16122
 
15799
16123
 
15800
16124
  static inline pm_node_t *
15801
- parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
15802
- pm_node_t *value = parse_starred_expression(parser, binding_power, diag_id);
16125
+ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
16126
+ pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id);
15803
16127
 
15804
16128
  bool is_single_value = true;
15805
16129
  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
@@ -15811,7 +16135,7 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
15811
16135
  value = (pm_node_t *) array;
15812
16136
 
15813
16137
  while (accept1(parser, PM_TOKEN_COMMA)) {
15814
- pm_node_t *element = parse_starred_expression(parser, binding_power, PM_ERR_ARRAY_ELEMENT);
16138
+ pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT);
15815
16139
  pm_array_node_elements_append(array, element);
15816
16140
  if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
15817
16141
  }
@@ -15821,7 +16145,7 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
15821
16145
  if (is_single_value && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
15822
16146
  pm_token_t rescue = parser->current;
15823
16147
  parser_lex(parser);
15824
- pm_node_t *right = parse_expression(parser, binding_power, PM_ERR_RESCUE_MODIFIER_VALUE);
16148
+ pm_node_t *right = parse_expression(parser, binding_power, false, PM_ERR_RESCUE_MODIFIER_VALUE);
15825
16149
 
15826
16150
  return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
15827
16151
  }
@@ -15879,7 +16203,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
15879
16203
  pm_string_list_t named_captures = { 0 };
15880
16204
  pm_node_t *result;
15881
16205
 
15882
- if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
16206
+ if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, parser->encoding) && (named_captures.length > 0)) {
15883
16207
  // Since we should not create a MatchWriteNode when all capture names
15884
16208
  // are invalid, creating a MatchWriteNode is delayed here.
15885
16209
  pm_match_write_node_t *match = NULL;
@@ -15913,6 +16237,8 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
15913
16237
  if (memory == NULL) abort();
15914
16238
 
15915
16239
  memcpy(memory, source, length);
16240
+ // This silences clang analyzer warning about leak of memory pointed by `memory`.
16241
+ // NOLINTNEXTLINE(clang-analyzer-*)
15916
16242
  name = pm_parser_constant_id_owned(parser, (const uint8_t *) memory, length);
15917
16243
 
15918
16244
  if (pm_token_is_numbered_parameter(source, source + length)) {
@@ -15960,7 +16286,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
15960
16286
  }
15961
16287
 
15962
16288
  static inline pm_node_t *
15963
- parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power) {
16289
+ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call) {
15964
16290
  pm_token_t token = parser->current;
15965
16291
 
15966
16292
  switch (token.type) {
@@ -15979,7 +16305,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15979
16305
  /* fallthrough */
15980
16306
  case PM_CASE_WRITABLE: {
15981
16307
  parser_lex(parser);
15982
- pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
16308
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
15983
16309
  return parse_write(parser, node, &token, value);
15984
16310
  }
15985
16311
  case PM_SPLAT_NODE: {
@@ -15987,7 +16313,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15987
16313
  pm_multi_target_node_targets_append(parser, multi_target, node);
15988
16314
 
15989
16315
  parser_lex(parser);
15990
- pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
16316
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
15991
16317
  return parse_write(parser, (pm_node_t *) multi_target, &token, value);
15992
16318
  }
15993
16319
  default:
@@ -16009,7 +16335,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16009
16335
  case PM_GLOBAL_VARIABLE_READ_NODE: {
16010
16336
  parser_lex(parser);
16011
16337
 
16012
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16338
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16013
16339
  pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
16014
16340
 
16015
16341
  pm_node_destroy(parser, node);
@@ -16018,7 +16344,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16018
16344
  case PM_CLASS_VARIABLE_READ_NODE: {
16019
16345
  parser_lex(parser);
16020
16346
 
16021
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16347
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16022
16348
  pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
16023
16349
 
16024
16350
  pm_node_destroy(parser, node);
@@ -16027,13 +16353,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16027
16353
  case PM_CONSTANT_PATH_NODE: {
16028
16354
  parser_lex(parser);
16029
16355
 
16030
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16356
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16031
16357
  return (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
16032
16358
  }
16033
16359
  case PM_CONSTANT_READ_NODE: {
16034
16360
  parser_lex(parser);
16035
16361
 
16036
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16362
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16037
16363
  pm_node_t *result = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
16038
16364
 
16039
16365
  pm_node_destroy(parser, node);
@@ -16042,7 +16368,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16042
16368
  case PM_INSTANCE_VARIABLE_READ_NODE: {
16043
16369
  parser_lex(parser);
16044
16370
 
16045
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16371
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16046
16372
  pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
16047
16373
 
16048
16374
  pm_node_destroy(parser, node);
@@ -16052,7 +16378,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16052
16378
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
16053
16379
  parser_lex(parser);
16054
16380
 
16055
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16381
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16056
16382
  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
16057
16383
 
16058
16384
  pm_node_destroy(parser, node);
@@ -16070,7 +16396,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16070
16396
  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
16071
16397
 
16072
16398
  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16073
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16399
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16074
16400
  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
16075
16401
 
16076
16402
  pm_node_destroy(parser, (pm_node_t *) cast);
@@ -16081,7 +16407,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16081
16407
  // this is an aref expression, and we can transform it into
16082
16408
  // an aset expression.
16083
16409
  if (pm_call_node_index_p(cast)) {
16084
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16410
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16085
16411
  return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
16086
16412
  }
16087
16413
 
@@ -16093,7 +16419,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16093
16419
  }
16094
16420
 
16095
16421
  parse_call_operator_write(parser, cast, &token);
16096
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16422
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16097
16423
  return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
16098
16424
  }
16099
16425
  case PM_MULTI_WRITE_NODE: {
@@ -16120,7 +16446,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16120
16446
  case PM_GLOBAL_VARIABLE_READ_NODE: {
16121
16447
  parser_lex(parser);
16122
16448
 
16123
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16449
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16124
16450
  pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
16125
16451
 
16126
16452
  pm_node_destroy(parser, node);
@@ -16129,7 +16455,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16129
16455
  case PM_CLASS_VARIABLE_READ_NODE: {
16130
16456
  parser_lex(parser);
16131
16457
 
16132
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16458
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16133
16459
  pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
16134
16460
 
16135
16461
  pm_node_destroy(parser, node);
@@ -16138,13 +16464,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16138
16464
  case PM_CONSTANT_PATH_NODE: {
16139
16465
  parser_lex(parser);
16140
16466
 
16141
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16467
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16142
16468
  return (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
16143
16469
  }
16144
16470
  case PM_CONSTANT_READ_NODE: {
16145
16471
  parser_lex(parser);
16146
16472
 
16147
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16473
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16148
16474
  pm_node_t *result = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
16149
16475
 
16150
16476
  pm_node_destroy(parser, node);
@@ -16153,7 +16479,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16153
16479
  case PM_INSTANCE_VARIABLE_READ_NODE: {
16154
16480
  parser_lex(parser);
16155
16481
 
16156
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16482
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16157
16483
  pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
16158
16484
 
16159
16485
  pm_node_destroy(parser, node);
@@ -16163,7 +16489,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16163
16489
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
16164
16490
  parser_lex(parser);
16165
16491
 
16166
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16492
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16167
16493
  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
16168
16494
 
16169
16495
  pm_node_destroy(parser, node);
@@ -16181,7 +16507,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16181
16507
  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
16182
16508
 
16183
16509
  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16184
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16510
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16185
16511
  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
16186
16512
 
16187
16513
  pm_node_destroy(parser, (pm_node_t *) cast);
@@ -16192,7 +16518,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16192
16518
  // this is an aref expression, and we can transform it into
16193
16519
  // an aset expression.
16194
16520
  if (pm_call_node_index_p(cast)) {
16195
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16521
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16196
16522
  return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
16197
16523
  }
16198
16524
 
@@ -16204,7 +16530,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16204
16530
  }
16205
16531
 
16206
16532
  parse_call_operator_write(parser, cast, &token);
16207
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16533
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16208
16534
  return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
16209
16535
  }
16210
16536
  case PM_MULTI_WRITE_NODE: {
@@ -16241,7 +16567,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16241
16567
  case PM_GLOBAL_VARIABLE_READ_NODE: {
16242
16568
  parser_lex(parser);
16243
16569
 
16244
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16570
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16245
16571
  pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
16246
16572
 
16247
16573
  pm_node_destroy(parser, node);
@@ -16250,7 +16576,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16250
16576
  case PM_CLASS_VARIABLE_READ_NODE: {
16251
16577
  parser_lex(parser);
16252
16578
 
16253
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16579
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16254
16580
  pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
16255
16581
 
16256
16582
  pm_node_destroy(parser, node);
@@ -16259,13 +16585,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16259
16585
  case PM_CONSTANT_PATH_NODE: {
16260
16586
  parser_lex(parser);
16261
16587
 
16262
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16588
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16263
16589
  return (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
16264
16590
  }
16265
16591
  case PM_CONSTANT_READ_NODE: {
16266
16592
  parser_lex(parser);
16267
16593
 
16268
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16594
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16269
16595
  pm_node_t *result = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
16270
16596
 
16271
16597
  pm_node_destroy(parser, node);
@@ -16274,7 +16600,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16274
16600
  case PM_INSTANCE_VARIABLE_READ_NODE: {
16275
16601
  parser_lex(parser);
16276
16602
 
16277
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16603
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16278
16604
  pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
16279
16605
 
16280
16606
  pm_node_destroy(parser, node);
@@ -16284,7 +16610,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16284
16610
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
16285
16611
  parser_lex(parser);
16286
16612
 
16287
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16613
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16288
16614
  pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
16289
16615
 
16290
16616
  pm_node_destroy(parser, node);
@@ -16302,7 +16628,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16302
16628
  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
16303
16629
 
16304
16630
  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16305
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16631
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16306
16632
  pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
16307
16633
 
16308
16634
  pm_node_destroy(parser, (pm_node_t *) cast);
@@ -16313,7 +16639,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16313
16639
  // this is an aref expression, and we can transform it into
16314
16640
  // an aset expression.
16315
16641
  if (pm_call_node_index_p(cast)) {
16316
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16642
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16317
16643
  return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
16318
16644
  }
16319
16645
 
@@ -16325,7 +16651,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16325
16651
  }
16326
16652
 
16327
16653
  parse_call_operator_write(parser, cast, &token);
16328
- pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16654
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16329
16655
  return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
16330
16656
  }
16331
16657
  case PM_MULTI_WRITE_NODE: {
@@ -16347,14 +16673,14 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16347
16673
  case PM_TOKEN_KEYWORD_AND: {
16348
16674
  parser_lex(parser);
16349
16675
 
16350
- pm_node_t *right = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16676
+ pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16351
16677
  return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
16352
16678
  }
16353
16679
  case PM_TOKEN_KEYWORD_OR:
16354
16680
  case PM_TOKEN_PIPE_PIPE: {
16355
16681
  parser_lex(parser);
16356
16682
 
16357
- pm_node_t *right = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16683
+ pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16358
16684
  return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
16359
16685
  }
16360
16686
  case PM_TOKEN_EQUAL_TILDE: {
@@ -16366,7 +16692,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16366
16692
  //
16367
16693
  // In this case, `foo` should be a method call and not a local yet.
16368
16694
  parser_lex(parser);
16369
- pm_node_t *argument = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16695
+ pm_node_t *argument = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16370
16696
 
16371
16697
  // By default, we're going to create a call node and then return it.
16372
16698
  pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument);
@@ -16451,7 +16777,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16451
16777
  case PM_TOKEN_STAR_STAR: {
16452
16778
  parser_lex(parser);
16453
16779
 
16454
- pm_node_t *argument = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16780
+ pm_node_t *argument = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16455
16781
  return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument);
16456
16782
  }
16457
16783
  case PM_TOKEN_AMPERSAND_DOT:
@@ -16462,7 +16788,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16462
16788
 
16463
16789
  // This if statement handles the foo.() syntax.
16464
16790
  if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
16465
- parse_arguments_list(parser, &arguments, true);
16791
+ parse_arguments_list(parser, &arguments, true, false);
16466
16792
  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
16467
16793
  }
16468
16794
 
@@ -16484,7 +16810,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16484
16810
  }
16485
16811
  }
16486
16812
 
16487
- parse_arguments_list(parser, &arguments, true);
16813
+ parse_arguments_list(parser, &arguments, true, accepts_command_call);
16488
16814
  pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
16489
16815
 
16490
16816
  if (
@@ -16504,7 +16830,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16504
16830
 
16505
16831
  pm_node_t *right = NULL;
16506
16832
  if (token_begins_expression_p(parser->current.type)) {
16507
- right = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16833
+ right = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16508
16834
  }
16509
16835
 
16510
16836
  return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
@@ -16513,14 +16839,14 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16513
16839
  pm_token_t keyword = parser->current;
16514
16840
  parser_lex(parser);
16515
16841
 
16516
- pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_IF_PREDICATE);
16842
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
16517
16843
  return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
16518
16844
  }
16519
16845
  case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
16520
16846
  pm_token_t keyword = parser->current;
16521
16847
  parser_lex(parser);
16522
16848
 
16523
- pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
16849
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
16524
16850
  return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
16525
16851
  }
16526
16852
  case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
@@ -16528,7 +16854,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16528
16854
  pm_statements_node_t *statements = pm_statements_node_create(parser);
16529
16855
  pm_statements_node_body_append(statements, node);
16530
16856
 
16531
- pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
16857
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
16532
16858
  return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
16533
16859
  }
16534
16860
  case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
@@ -16536,13 +16862,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16536
16862
  pm_statements_node_t *statements = pm_statements_node_create(parser);
16537
16863
  pm_statements_node_body_append(statements, node);
16538
16864
 
16539
- pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
16865
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
16540
16866
  return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
16541
16867
  }
16542
16868
  case PM_TOKEN_QUESTION_MARK: {
16543
16869
  pm_token_t qmark = parser->current;
16544
16870
  parser_lex(parser);
16545
- pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_TERNARY_EXPRESSION_TRUE);
16871
+ pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_TERNARY_EXPRESSION_TRUE);
16546
16872
 
16547
16873
  if (parser->recovering) {
16548
16874
  // If parsing the true expression of this ternary resulted in a syntax
@@ -16561,7 +16887,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16561
16887
  expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
16562
16888
 
16563
16889
  pm_token_t colon = parser->previous;
16564
- pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_TERNARY_EXPRESSION_FALSE);
16890
+ pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_TERNARY_EXPRESSION_FALSE);
16565
16891
 
16566
16892
  return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
16567
16893
  }
@@ -16587,7 +16913,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16587
16913
  pm_token_t message = parser->previous;
16588
16914
  pm_arguments_t arguments = { 0 };
16589
16915
 
16590
- parse_arguments_list(parser, &arguments, true);
16916
+ parse_arguments_list(parser, &arguments, true, accepts_command_call);
16591
16917
  path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
16592
16918
  } else {
16593
16919
  // Otherwise, this is a constant path. That would look like Foo::Bar.
@@ -16612,7 +16938,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16612
16938
  // If we have an identifier following a '::' operator, then it is for
16613
16939
  // sure a method call.
16614
16940
  pm_arguments_t arguments = { 0 };
16615
- parse_arguments_list(parser, &arguments, true);
16941
+ parse_arguments_list(parser, &arguments, true, accepts_command_call);
16616
16942
  pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
16617
16943
 
16618
16944
  // If this is followed by a comma then it is a multiple assignment.
@@ -16626,7 +16952,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16626
16952
  // If we have a parenthesis following a '::' operator, then it is the
16627
16953
  // method call shorthand. That would look like Foo::(bar).
16628
16954
  pm_arguments_t arguments = { 0 };
16629
- parse_arguments_list(parser, &arguments, true);
16955
+ parse_arguments_list(parser, &arguments, true, false);
16630
16956
 
16631
16957
  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
16632
16958
  }
@@ -16640,7 +16966,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16640
16966
  case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
16641
16967
  parser_lex(parser);
16642
16968
  accept1(parser, PM_TOKEN_NEWLINE);
16643
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_RESCUE_MODIFIER_VALUE);
16969
+ pm_node_t *value = parse_expression(parser, binding_power, true, PM_ERR_RESCUE_MODIFIER_VALUE);
16644
16970
 
16645
16971
  return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
16646
16972
  }
@@ -16736,16 +17062,39 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16736
17062
  * determine if they need to perform additional cleanup.
16737
17063
  */
16738
17064
  static pm_node_t *
16739
- parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
17065
+ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
16740
17066
  pm_token_t recovery = parser->previous;
16741
- pm_node_t *node = parse_expression_prefix(parser, binding_power);
17067
+ pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
16742
17068
 
16743
- // If we found a syntax error, then the type of node returned by
16744
- // parse_expression_prefix is going to be a missing node. In that case we need
16745
- // to add the error message to the parser's error list.
16746
- if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
16747
- pm_parser_err(parser, recovery.end, recovery.end, diag_id);
16748
- return node;
17069
+ switch (PM_NODE_TYPE(node)) {
17070
+ case PM_MISSING_NODE:
17071
+ // If we found a syntax error, then the type of node returned by
17072
+ // parse_expression_prefix is going to be a missing node. In that
17073
+ // case we need to add the error message to the parser's error list.
17074
+ pm_parser_err(parser, recovery.end, recovery.end, diag_id);
17075
+ return node;
17076
+ case PM_PRE_EXECUTION_NODE:
17077
+ case PM_POST_EXECUTION_NODE:
17078
+ case PM_ALIAS_GLOBAL_VARIABLE_NODE:
17079
+ case PM_ALIAS_METHOD_NODE:
17080
+ case PM_UNDEF_NODE:
17081
+ // These expressions are statements, and cannot be followed by
17082
+ // operators (except modifiers).
17083
+ if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER_RESCUE) {
17084
+ return node;
17085
+ }
17086
+ break;
17087
+ case PM_RANGE_NODE:
17088
+ // Range operators are non-associative, so that it does not
17089
+ // associate with other range operators (i.e. `..1..` should be
17090
+ // rejected.) For this reason, we check such a case for unary ranges
17091
+ // here, and if so, it returns the node immediately,
17092
+ if ((((pm_range_node_t *) node)->left == NULL) && pm_binding_powers[parser->current.type].left >= PM_BINDING_POWER_RANGE) {
17093
+ return node;
17094
+ }
17095
+ break;
17096
+ default:
17097
+ break;
16749
17098
  }
16750
17099
 
16751
17100
  // Otherwise we'll look and see if the next token can be parsed as an infix
@@ -16756,12 +17105,68 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagn
16756
17105
  binding_power <= current_binding_powers.left &&
16757
17106
  current_binding_powers.binary
16758
17107
  ) {
16759
- node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right);
16760
- if (
16761
- current_binding_powers.nonassoc &&
16762
- current_binding_powers.right <= pm_binding_powers[parser->current.type].left
16763
- ) {
16764
- break;
17108
+ node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call);
17109
+ if (current_binding_powers.nonassoc) {
17110
+ bool endless_range_p = PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL;
17111
+ pm_binding_power_t left = endless_range_p ? PM_BINDING_POWER_TERM : current_binding_powers.left;
17112
+ if (
17113
+ left <= pm_binding_powers[parser->current.type].left ||
17114
+ // Exceptionally to operator precedences, '1.. & 2' is rejected.
17115
+ // '1.. || 2' is also an exception, but it is handled by the lexer.
17116
+ // (Here, parser->current is PM_TOKEN_PIPE, not PM_TOKEN_PIPE_PIPE).
17117
+ (endless_range_p && match1(parser, PM_TOKEN_AMPERSAND))
17118
+ ) {
17119
+ break;
17120
+ }
17121
+ }
17122
+ if (accepts_command_call) {
17123
+ // A command-style method call is only accepted on method chains.
17124
+ // Thus, we check whether the parsed node can continue method chains.
17125
+ // The method chain can continue if the parsed node is one of the following five kinds:
17126
+ // (1) index access: foo[1]
17127
+ // (2) attribute access: foo.bar
17128
+ // (3) method call with parenthesis: foo.bar(1)
17129
+ // (4) method call with a block: foo.bar do end
17130
+ // (5) constant path: foo::Bar
17131
+ switch (node->type) {
17132
+ case PM_CALL_NODE: {
17133
+ pm_call_node_t *cast = (pm_call_node_t *)node;
17134
+ if (
17135
+ // (1) foo[1]
17136
+ !(
17137
+ cast->call_operator_loc.start == NULL &&
17138
+ cast->message_loc.start != NULL &&
17139
+ cast->message_loc.start[0] == '[' &&
17140
+ cast->message_loc.end[-1] == ']'
17141
+ ) &&
17142
+ // (2) foo.bar
17143
+ !(
17144
+ cast->call_operator_loc.start != NULL &&
17145
+ cast->arguments == NULL &&
17146
+ cast->block == NULL &&
17147
+ cast->opening_loc.start == NULL
17148
+ ) &&
17149
+ // (3) foo.bar(1)
17150
+ !(
17151
+ cast->call_operator_loc.start != NULL &&
17152
+ cast->opening_loc.start != NULL
17153
+ ) &&
17154
+ // (4) foo.bar do end
17155
+ !(
17156
+ cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
17157
+ )
17158
+ ) {
17159
+ accepts_command_call = false;
17160
+ }
17161
+ break;
17162
+ }
17163
+ // (5) foo::Bar
17164
+ case PM_CONSTANT_PATH_NODE:
17165
+ break;
17166
+ default:
17167
+ accepts_command_call = false;
17168
+ break;
17169
+ }
16765
17170
  }
16766
17171
  }
16767
17172
 
@@ -16825,9 +17230,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
16825
17230
  .error_list = { 0 },
16826
17231
  .current_scope = NULL,
16827
17232
  .current_context = NULL,
16828
- .encoding = pm_encoding_utf_8,
17233
+ .encoding = PM_ENCODING_UTF_8_ENTRY,
16829
17234
  .encoding_changed_callback = NULL,
16830
- .encoding_decode_callback = NULL,
16831
17235
  .encoding_comment_start = source,
16832
17236
  .lex_callback = NULL,
16833
17237
  .filepath_string = { 0 },
@@ -16836,11 +17240,13 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
16836
17240
  .integer_base = 0,
16837
17241
  .current_string = PM_STRING_EMPTY,
16838
17242
  .start_line = 1,
17243
+ .explicit_encoding = NULL,
16839
17244
  .command_start = true,
16840
17245
  .recovering = false,
16841
17246
  .encoding_changed = false,
16842
17247
  .pattern_matching_newlines = false,
16843
17248
  .in_keyword_arg = false,
17249
+ .current_param_name = 0,
16844
17250
  .semantic_token_seen = false,
16845
17251
  .frozen_string_literal = false,
16846
17252
  .suppress_warnings = false
@@ -16875,9 +17281,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
16875
17281
  parser->filepath_string = options->filepath;
16876
17282
 
16877
17283
  // line option
16878
- if (options->line > 0) {
16879
- parser->start_line = options->line;
16880
- }
17284
+ parser->start_line = options->line;
16881
17285
 
16882
17286
  // encoding option
16883
17287
  size_t encoding_length = pm_string_length(&options->encoding);
@@ -16943,18 +17347,6 @@ pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_ch
16943
17347
  parser->encoding_changed_callback = callback;
16944
17348
  }
16945
17349
 
16946
- /**
16947
- * Register a callback that will be called when prism encounters a magic comment
16948
- * with an encoding referenced that it doesn't understand. The callback should
16949
- * return NULL if it also doesn't understand the encoding or it should return a
16950
- * pointer to a pm_encoding_t struct that contains the functions necessary to
16951
- * parse identifiers.
16952
- */
16953
- PRISM_EXPORTED_FUNCTION void
16954
- pm_parser_register_encoding_decode_callback(pm_parser_t *parser, pm_encoding_decode_callback_t callback) {
16955
- parser->encoding_decode_callback = callback;
16956
- }
16957
-
16958
17350
  /**
16959
17351
  * Free all of the memory associated with the comment list.
16960
17352
  */
@@ -17046,7 +17438,7 @@ pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
17046
17438
  PRISM_EXPORTED_FUNCTION void
17047
17439
  pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
17048
17440
  pm_options_t options = { 0 };
17049
- if (data != NULL) pm_options_read(&options, data);
17441
+ pm_options_read(&options, data);
17050
17442
 
17051
17443
  pm_parser_t parser;
17052
17444
  pm_parser_init(&parser, source, size, &options);
@@ -17068,15 +17460,15 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
17068
17460
  PRISM_EXPORTED_FUNCTION void
17069
17461
  pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
17070
17462
  pm_options_t options = { 0 };
17071
- if (data != NULL) pm_options_read(&options, data);
17463
+ pm_options_read(&options, data);
17072
17464
 
17073
17465
  pm_parser_t parser;
17074
17466
  pm_parser_init(&parser, source, size, &options);
17075
17467
 
17076
17468
  pm_node_t *node = pm_parse(&parser);
17077
17469
  pm_serialize_header(buffer);
17078
- pm_serialize_encoding(&parser.encoding, buffer);
17079
- pm_buffer_append_varint(buffer, parser.start_line);
17470
+ pm_serialize_encoding(parser.encoding, buffer);
17471
+ pm_buffer_append_varsint(buffer, parser.start_line);
17080
17472
  pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
17081
17473
 
17082
17474
  pm_node_destroy(&parser, node);