prism 0.19.0 → 0.20.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +29 -1
  3. data/Makefile +5 -0
  4. data/README.md +8 -6
  5. data/config.yml +236 -38
  6. data/docs/build_system.md +19 -2
  7. data/docs/cruby_compilation.md +27 -0
  8. data/docs/parser_translation.md +34 -0
  9. data/docs/parsing_rules.md +19 -0
  10. data/docs/releasing.md +3 -3
  11. data/docs/ruby_api.md +1 -1
  12. data/docs/serialization.md +17 -5
  13. data/ext/prism/api_node.c +101 -81
  14. data/ext/prism/extension.c +74 -11
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +1699 -504
  17. data/include/prism/defines.h +8 -0
  18. data/include/prism/diagnostic.h +39 -2
  19. data/include/prism/encoding.h +10 -0
  20. data/include/prism/options.h +40 -14
  21. data/include/prism/parser.h +33 -17
  22. data/include/prism/util/pm_buffer.h +9 -0
  23. data/include/prism/util/pm_constant_pool.h +7 -0
  24. data/include/prism/util/pm_newline_list.h +0 -11
  25. data/include/prism/version.h +2 -2
  26. data/include/prism.h +19 -2
  27. data/lib/prism/debug.rb +11 -5
  28. data/lib/prism/dot_visitor.rb +36 -14
  29. data/lib/prism/dsl.rb +22 -22
  30. data/lib/prism/ffi.rb +2 -2
  31. data/lib/prism/node.rb +1020 -737
  32. data/lib/prism/node_ext.rb +2 -2
  33. data/lib/prism/parse_result.rb +17 -9
  34. data/lib/prism/serialize.rb +53 -29
  35. data/lib/prism/translation/parser/compiler.rb +1831 -0
  36. data/lib/prism/translation/parser/lexer.rb +335 -0
  37. data/lib/prism/translation/parser/rubocop.rb +37 -0
  38. data/lib/prism/translation/parser.rb +163 -0
  39. data/lib/prism/translation.rb +11 -0
  40. data/lib/prism.rb +1 -0
  41. data/prism.gemspec +12 -5
  42. data/rbi/prism.rbi +150 -88
  43. data/rbi/prism_static.rbi +15 -3
  44. data/sig/prism.rbs +996 -961
  45. data/sig/prism_static.rbs +123 -46
  46. data/src/diagnostic.c +259 -219
  47. data/src/encoding.c +4 -8
  48. data/src/node.c +2 -6
  49. data/src/options.c +24 -5
  50. data/src/prettyprint.c +174 -42
  51. data/src/prism.c +1136 -328
  52. data/src/serialize.c +12 -9
  53. data/src/token_type.c +353 -4
  54. data/src/util/pm_buffer.c +11 -0
  55. data/src/util/pm_constant_pool.c +12 -11
  56. data/src/util/pm_newline_list.c +2 -14
  57. metadata +10 -3
  58. data/docs/building.md +0 -29
data/src/prism.c CHANGED
@@ -164,7 +164,7 @@ debug_state(pm_parser_t *parser) {
164
164
 
165
165
  PRISM_ATTRIBUTE_UNUSED static void
166
166
  debug_token(pm_token_t * token) {
167
- fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_to_str(token->type), (int) (token->end - token->start), token->start);
167
+ fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_human(token->type), (int) (token->end - token->start), token->start);
168
168
  }
169
169
 
170
170
  #endif
@@ -423,6 +423,11 @@ lex_state_beg_p(pm_parser_t *parser) {
423
423
  return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
424
424
  }
425
425
 
426
+ static inline bool
427
+ lex_state_arg_labeled_p(pm_parser_t *parser) {
428
+ return (parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
429
+ }
430
+
426
431
  static inline bool
427
432
  lex_state_arg_p(pm_parser_t *parser) {
428
433
  return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
@@ -548,9 +553,7 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
548
553
  */
549
554
  static inline void
550
555
  pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
551
- if (!parser->suppress_warnings) {
552
- pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
553
- }
556
+ pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
554
557
  }
555
558
 
556
559
  /**
@@ -813,6 +816,9 @@ typedef struct {
813
816
 
814
817
  /** The optional block attached to the call. */
815
818
  pm_node_t *block;
819
+
820
+ /** The flag indicating whether this arguments list has forwarding argument. */
821
+ bool has_forwarding;
816
822
  } pm_arguments_t;
817
823
 
818
824
  /**
@@ -884,6 +890,22 @@ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
884
890
  node->flags &= (pm_node_flags_t) ~flag;
885
891
  }
886
892
 
893
+ /**
894
+ * Set the repeated parameter flag on the given node.
895
+ */
896
+ static inline void
897
+ pm_node_flag_set_repeated_parameter(pm_node_t *node) {
898
+ assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
899
+ PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
900
+ PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
901
+ PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
902
+ PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
903
+ PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
904
+ PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
905
+ PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
906
+
907
+ pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
908
+ }
887
909
 
888
910
  /******************************************************************************/
889
911
  /* Node creation functions */
@@ -977,7 +999,7 @@ static inline void *
977
999
  pm_alloc_node(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
978
1000
  void *memory = calloc(1, size);
979
1001
  if (memory == NULL) {
980
- fprintf(stderr, "Failed to allocate %zu bytes\n", size);
1002
+ fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
981
1003
  abort();
982
1004
  }
983
1005
  return memory;
@@ -1325,7 +1347,7 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
1325
1347
  pm_assoc_node_t *node = PM_ALLOC_NODE(parser, pm_assoc_node_t);
1326
1348
  const uint8_t *end;
1327
1349
 
1328
- if (value != NULL) {
1350
+ if (value != NULL && value->location.end > key->location.end) {
1329
1351
  end = value->location.end;
1330
1352
  } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
1331
1353
  end = operator->end;
@@ -1333,6 +1355,13 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
1333
1355
  end = key->location.end;
1334
1356
  }
1335
1357
 
1358
+ // Hash string keys will be frozen, so we can mark them as frozen here so
1359
+ // that the compiler picks them up and also when we check for static literal
1360
+ // on the keys it gets factored in.
1361
+ if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
1362
+ key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
1363
+ }
1364
+
1336
1365
  // If the key and value of this assoc node are both static literals, then
1337
1366
  // we can mark this node as a static literal.
1338
1367
  pm_node_flags_t flags = 0;
@@ -1490,7 +1519,7 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p
1490
1519
  * Allocate and initialize a new BlockNode node.
1491
1520
  */
1492
1521
  static pm_block_node_t *
1493
- pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_t locals_body_index, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
1522
+ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
1494
1523
  pm_block_node_t *node = PM_ALLOC_NODE(parser, pm_block_node_t);
1495
1524
 
1496
1525
  *node = (pm_block_node_t) {
@@ -1499,7 +1528,6 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_
1499
1528
  .location = { .start = opening->start, .end = closing->end },
1500
1529
  },
1501
1530
  .locals = *locals,
1502
- .locals_body_index = locals_body_index,
1503
1531
  .parameters = parameters,
1504
1532
  .body = body,
1505
1533
  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
@@ -1645,12 +1673,13 @@ pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
1645
1673
  * in the various specializations of this function.
1646
1674
  */
1647
1675
  static pm_call_node_t *
1648
- pm_call_node_create(pm_parser_t *parser) {
1676
+ pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
1649
1677
  pm_call_node_t *node = PM_ALLOC_NODE(parser, pm_call_node_t);
1650
1678
 
1651
1679
  *node = (pm_call_node_t) {
1652
1680
  {
1653
1681
  .type = PM_CALL_NODE,
1682
+ .flags = flags,
1654
1683
  .location = PM_LOCATION_NULL_VALUE(parser),
1655
1684
  },
1656
1685
  .receiver = NULL,
@@ -1666,6 +1695,15 @@ pm_call_node_create(pm_parser_t *parser) {
1666
1695
  return node;
1667
1696
  }
1668
1697
 
1698
+ /**
1699
+ * Returns the value that the ignore visibility flag should be set to for the
1700
+ * given receiver.
1701
+ */
1702
+ static inline pm_node_flags_t
1703
+ pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
1704
+ return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
1705
+ }
1706
+
1669
1707
  /**
1670
1708
  * Allocate and initialize a new CallNode node from an aref or an aset
1671
1709
  * expression.
@@ -1674,7 +1712,7 @@ static pm_call_node_t *
1674
1712
  pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
1675
1713
  pm_assert_value_expression(parser, receiver);
1676
1714
 
1677
- pm_call_node_t *node = pm_call_node_create(parser);
1715
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1678
1716
 
1679
1717
  node->base.location.start = receiver->location.start;
1680
1718
  node->base.location.end = pm_arguments_end(arguments);
@@ -1700,7 +1738,7 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
1700
1738
  pm_assert_value_expression(parser, receiver);
1701
1739
  pm_assert_value_expression(parser, argument);
1702
1740
 
1703
- pm_call_node_t *node = pm_call_node_create(parser);
1741
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1704
1742
 
1705
1743
  node->base.location.start = MIN(receiver->location.start, argument->location.start);
1706
1744
  node->base.location.end = MAX(receiver->location.end, argument->location.end);
@@ -1723,7 +1761,7 @@ static pm_call_node_t *
1723
1761
  pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
1724
1762
  pm_assert_value_expression(parser, receiver);
1725
1763
 
1726
- pm_call_node_t *node = pm_call_node_create(parser);
1764
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1727
1765
 
1728
1766
  node->base.location.start = receiver->location.start;
1729
1767
  const uint8_t *end = pm_arguments_end(arguments);
@@ -1754,7 +1792,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
1754
1792
  */
1755
1793
  static pm_call_node_t *
1756
1794
  pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
1757
- pm_call_node_t *node = pm_call_node_create(parser);
1795
+ pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
1758
1796
 
1759
1797
  node->base.location.start = message->start;
1760
1798
  node->base.location.end = pm_arguments_end(arguments);
@@ -1776,7 +1814,7 @@ static pm_call_node_t *
1776
1814
  pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
1777
1815
  pm_assert_value_expression(parser, receiver);
1778
1816
 
1779
- pm_call_node_t *node = pm_call_node_create(parser);
1817
+ pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
1780
1818
 
1781
1819
  node->base.location.start = message->start;
1782
1820
  if (arguments->closing_loc.start != NULL) {
@@ -1802,7 +1840,7 @@ static pm_call_node_t *
1802
1840
  pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
1803
1841
  pm_assert_value_expression(parser, receiver);
1804
1842
 
1805
- pm_call_node_t *node = pm_call_node_create(parser);
1843
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1806
1844
 
1807
1845
  node->base.location.start = receiver->location.start;
1808
1846
  node->base.location.end = pm_arguments_end(arguments);
@@ -1829,7 +1867,7 @@ static pm_call_node_t *
1829
1867
  pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
1830
1868
  pm_assert_value_expression(parser, receiver);
1831
1869
 
1832
- pm_call_node_t *node = pm_call_node_create(parser);
1870
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1833
1871
 
1834
1872
  node->base.location.start = operator->start;
1835
1873
  node->base.location.end = receiver->location.end;
@@ -1847,7 +1885,7 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *
1847
1885
  */
1848
1886
  static pm_call_node_t *
1849
1887
  pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
1850
- pm_call_node_t *node = pm_call_node_create(parser);
1888
+ pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
1851
1889
 
1852
1890
  node->base.location = PM_LOCATION_TOKEN_VALUE(message);
1853
1891
  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
@@ -2167,11 +2205,12 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
2167
2205
  static pm_index_target_node_t *
2168
2206
  pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
2169
2207
  pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
2208
+ pm_node_flags_t flags = target->base.flags;
2170
2209
 
2171
2210
  *node = (pm_index_target_node_t) {
2172
2211
  {
2173
2212
  .type = PM_INDEX_TARGET_NODE,
2174
- .flags = target->base.flags,
2213
+ .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
2175
2214
  .location = target->base.location
2176
2215
  },
2177
2216
  .receiver = target->receiver,
@@ -2701,6 +2740,45 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ
2701
2740
  return node;
2702
2741
  }
2703
2742
 
2743
+ /**
2744
+ * Check if the receiver of a `def` node is allowed.
2745
+ */
2746
+ static void
2747
+ pm_check_def_receiver(pm_parser_t *parser, pm_node_t *receiver) {
2748
+ switch (receiver->type) {
2749
+ case PM_BEGIN_NODE: {
2750
+ pm_begin_node_t *begin_node = (pm_begin_node_t *)receiver;
2751
+ pm_check_def_receiver(parser, (pm_node_t *) begin_node->statements);
2752
+ break;
2753
+ }
2754
+ case PM_PARENTHESES_NODE:
2755
+ pm_check_def_receiver(parser, ((pm_parentheses_node_t *) receiver)->body);
2756
+ break;
2757
+ case PM_STATEMENTS_NODE: {
2758
+ pm_statements_node_t *statements_node = (pm_statements_node_t *)receiver;
2759
+ pm_check_def_receiver(parser, statements_node->body.nodes[statements_node->body.size - 1]);
2760
+ break;
2761
+ }
2762
+ case PM_ARRAY_NODE:
2763
+ case PM_FLOAT_NODE:
2764
+ case PM_IMAGINARY_NODE:
2765
+ case PM_INTEGER_NODE:
2766
+ case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
2767
+ case PM_INTERPOLATED_STRING_NODE:
2768
+ case PM_INTERPOLATED_SYMBOL_NODE:
2769
+ case PM_INTERPOLATED_X_STRING_NODE:
2770
+ case PM_RATIONAL_NODE:
2771
+ case PM_REGULAR_EXPRESSION_NODE:
2772
+ case PM_SOURCE_ENCODING_NODE:
2773
+ case PM_SOURCE_FILE_NODE:
2774
+ case PM_SOURCE_LINE_NODE:
2775
+ case PM_STRING_NODE:
2776
+ case PM_SYMBOL_NODE:
2777
+ case PM_X_STRING_NODE:
2778
+ pm_parser_err_node(parser, receiver, PM_ERR_SINGLETON_FOR_LITERALS);
2779
+ }
2780
+ }
2781
+
2704
2782
  /**
2705
2783
  * Allocate and initialize a new DefNode node.
2706
2784
  */
@@ -2712,7 +2790,6 @@ pm_def_node_create(
2712
2790
  pm_parameters_node_t *parameters,
2713
2791
  pm_node_t *body,
2714
2792
  pm_constant_id_list_t *locals,
2715
- uint32_t locals_body_index,
2716
2793
  const pm_token_t *def_keyword,
2717
2794
  const pm_token_t *operator,
2718
2795
  const pm_token_t *lparen,
@@ -2729,6 +2806,10 @@ pm_def_node_create(
2729
2806
  end = end_keyword->end;
2730
2807
  }
2731
2808
 
2809
+ if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
2810
+ pm_check_def_receiver(parser, receiver);
2811
+ }
2812
+
2732
2813
  *node = (pm_def_node_t) {
2733
2814
  {
2734
2815
  .type = PM_DEF_NODE,
@@ -2740,7 +2821,6 @@ pm_def_node_create(
2740
2821
  .parameters = parameters,
2741
2822
  .body = body,
2742
2823
  .locals = *locals,
2743
- .locals_body_index = locals_body_index,
2744
2824
  .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
2745
2825
  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2746
2826
  .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
@@ -3962,9 +4042,8 @@ pm_keyword_hash_node_create(pm_parser_t *parser) {
3962
4042
  */
3963
4043
  static void
3964
4044
  pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
3965
- // If the element being added is not an AssocNode or does not have a symbol key, then
3966
- // we want to turn the STATIC_KEYS flag off.
3967
- // TODO: Rename the flag to SYMBOL_KEYS instead.
4045
+ // If the element being added is not an AssocNode or does not have a symbol
4046
+ // key, then we want to turn the SYMBOL_KEYS flag off.
3968
4047
  if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
3969
4048
  pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
3970
4049
  }
@@ -4051,7 +4130,6 @@ static pm_lambda_node_t *
4051
4130
  pm_lambda_node_create(
4052
4131
  pm_parser_t *parser,
4053
4132
  pm_constant_id_list_t *locals,
4054
- uint32_t locals_body_index,
4055
4133
  const pm_token_t *operator,
4056
4134
  const pm_token_t *opening,
4057
4135
  const pm_token_t *closing,
@@ -4069,7 +4147,6 @@ pm_lambda_node_create(
4069
4147
  },
4070
4148
  },
4071
4149
  .locals = *locals,
4072
- .locals_body_index = locals_body_index,
4073
4150
  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4074
4151
  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4075
4152
  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
@@ -4161,12 +4238,10 @@ pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, c
4161
4238
  }
4162
4239
 
4163
4240
  /**
4164
- * Allocate a new LocalVariableReadNode node.
4241
+ * Allocate a new LocalVariableReadNode node with constant_id.
4165
4242
  */
4166
4243
  static pm_local_variable_read_node_t *
4167
- pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
4168
- pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
4169
-
4244
+ pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth) {
4170
4245
  if (parser->current_param_name == name_id) {
4171
4246
  pm_parser_err_token(parser, name, PM_ERR_PARAMETER_CIRCULAR);
4172
4247
  }
@@ -4185,6 +4260,15 @@ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name,
4185
4260
  return node;
4186
4261
  }
4187
4262
 
4263
+ /**
4264
+ * Allocate a new LocalVariableReadNode node.
4265
+ */
4266
+ static pm_local_variable_read_node_t *
4267
+ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
4268
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
4269
+ return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth);
4270
+ }
4271
+
4188
4272
  /**
4189
4273
  * Allocate and initialize a new LocalVariableWriteNode node.
4190
4274
  */
@@ -4210,6 +4294,57 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name,
4210
4294
  return node;
4211
4295
  }
4212
4296
 
4297
+ /**
4298
+ * Returns true if the given bounds comprise `it`.
4299
+ */
4300
+ static inline bool
4301
+ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
4302
+ return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
4303
+ }
4304
+
4305
+ /**
4306
+ * Returns true if the given node is `it` default parameter.
4307
+ */
4308
+ static inline bool
4309
+ pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
4310
+ // Check if it's a local variable reference
4311
+ if (node->type != PM_CALL_NODE) {
4312
+ return false;
4313
+ }
4314
+
4315
+ // Check if it's a variable call
4316
+ pm_call_node_t *call_node = (pm_call_node_t *) node;
4317
+ if (!pm_call_node_variable_call_p(call_node)) {
4318
+ return false;
4319
+ }
4320
+
4321
+ // Check if it's called `it`
4322
+ pm_constant_id_t id = ((pm_call_node_t *)node)->name;
4323
+ pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
4324
+ return pm_token_is_it(constant->start, constant->start + constant->length);
4325
+ }
4326
+
4327
+ /**
4328
+ * Convert a `it` variable call node to a node for `it` default parameter.
4329
+ */
4330
+ static pm_node_t *
4331
+ pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
4332
+ if (
4333
+ (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
4334
+ !parser->current_scope->closed &&
4335
+ pm_node_is_it(parser, node)
4336
+ ) {
4337
+ if (parser->current_scope->explicit_params) {
4338
+ pm_parser_err_previous(parser, PM_ERR_IT_NOT_ALLOWED);
4339
+ } else {
4340
+ pm_node_destroy(parser, node);
4341
+ pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
4342
+ node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
4343
+ }
4344
+ }
4345
+ return node;
4346
+ }
4347
+
4213
4348
  /**
4214
4349
  * Returns true if the given bounds comprise a numbered parameter (i.e., they
4215
4350
  * are of the form /^_\d$/).
@@ -5372,18 +5507,59 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
5372
5507
  return node;
5373
5508
  }
5374
5509
 
5510
+ /**
5511
+ * Read through the contents of a string and check if it consists solely of US ASCII code points.
5512
+ */
5513
+ static bool
5514
+ pm_ascii_only_p(const pm_string_t *contents) {
5515
+ const size_t length = pm_string_length(contents);
5516
+ const uint8_t *source = pm_string_source(contents);
5517
+
5518
+ for (size_t index = 0; index < length; index++) {
5519
+ if (source[index] & 0x80) return false;
5520
+ }
5521
+
5522
+ return true;
5523
+ }
5524
+
5525
+ /**
5526
+ * Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
5527
+ * encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
5528
+ * points. Otherwise, the encoding may be explicitly set with an escape
5529
+ * sequence.
5530
+ */
5531
+ static inline pm_node_flags_t
5532
+ parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
5533
+ if (parser->explicit_encoding != NULL) {
5534
+ // A Symbol may optionally have its encoding explicitly set. This will
5535
+ // happen if an escape sequence results in a non-ASCII code point.
5536
+ if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
5537
+ return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
5538
+ } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
5539
+ return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
5540
+ }
5541
+ } else if (pm_ascii_only_p(contents)) {
5542
+ // Ruby stipulates that all source files must use an ASCII-compatible
5543
+ // encoding. Thus, all symbols appearing in source are eligible for
5544
+ // "downgrading" to US-ASCII.
5545
+ return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
5546
+ }
5547
+
5548
+ return 0;
5549
+ }
5550
+
5375
5551
  /**
5376
5552
  * Allocate and initialize a new SymbolNode node with the given unescaped
5377
5553
  * string.
5378
5554
  */
5379
5555
  static pm_symbol_node_t *
5380
- pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped) {
5556
+ pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
5381
5557
  pm_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
5382
5558
 
5383
5559
  *node = (pm_symbol_node_t) {
5384
5560
  {
5385
5561
  .type = PM_SYMBOL_NODE,
5386
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
5562
+ .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
5387
5563
  .location = {
5388
5564
  .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
5389
5565
  .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
@@ -5403,7 +5579,7 @@ pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening,
5403
5579
  */
5404
5580
  static inline pm_symbol_node_t *
5405
5581
  pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
5406
- return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY);
5582
+ return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
5407
5583
  }
5408
5584
 
5409
5585
  /**
@@ -5411,7 +5587,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
5411
5587
  */
5412
5588
  static pm_symbol_node_t *
5413
5589
  pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
5414
- pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string);
5590
+ pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
5415
5591
  parser->current_string = PM_STRING_EMPTY;
5416
5592
  return node;
5417
5593
  }
@@ -5433,6 +5609,8 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
5433
5609
 
5434
5610
  assert((label.end - label.start) >= 0);
5435
5611
  pm_string_shared_init(&node->unescaped, label.start, label.end);
5612
+ pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
5613
+
5436
5614
  break;
5437
5615
  }
5438
5616
  case PM_TOKEN_MISSING: {
@@ -5495,6 +5673,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
5495
5673
  .unescaped = node->unescaped
5496
5674
  };
5497
5675
 
5676
+ pm_node_flag_set((pm_node_t *)new_node, parse_symbol_encoding(parser, &node->unescaped));
5677
+
5498
5678
  // We are explicitly _not_ using pm_node_destroy here because we don't want
5499
5679
  // to trash the unescaped string. We could instead copy the string if we
5500
5680
  // know that it is owned, but we're taking the fast path for now.
@@ -5885,6 +6065,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
5885
6065
  .closed = closed,
5886
6066
  .explicit_params = false,
5887
6067
  .numbered_parameters = 0,
6068
+ .forwarding_params = 0,
5888
6069
  };
5889
6070
 
5890
6071
  pm_constant_id_list_init(&scope->locals);
@@ -5893,6 +6074,76 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
5893
6074
  return true;
5894
6075
  }
5895
6076
 
6077
+ static void
6078
+ pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag)
6079
+ {
6080
+ pm_scope_t *scope = parser->current_scope;
6081
+ while (scope) {
6082
+ if (scope->forwarding_params & mask) {
6083
+ if (!scope->closed) {
6084
+ pm_parser_err_token(parser, token, diag);
6085
+ return;
6086
+ }
6087
+ return;
6088
+ }
6089
+ if (scope->closed) break;
6090
+ scope = scope->previous;
6091
+ }
6092
+
6093
+ pm_parser_err_token(parser, token, diag);
6094
+ }
6095
+
6096
+ static inline void
6097
+ pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token)
6098
+ {
6099
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
6100
+ }
6101
+
6102
+ static void
6103
+ pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token)
6104
+ {
6105
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
6106
+ }
6107
+
6108
+ static inline void
6109
+ pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token)
6110
+ {
6111
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
6112
+ }
6113
+
6114
+ static inline void
6115
+ pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token)
6116
+ {
6117
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_KEYWORDS, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
6118
+ }
6119
+
6120
+ /**
6121
+ * Save the current param name as the return value and set it to the given
6122
+ * constant id.
6123
+ */
6124
+ static inline pm_constant_id_t
6125
+ pm_parser_current_param_name_set(pm_parser_t *parser, pm_constant_id_t current_param_name) {
6126
+ pm_constant_id_t saved_param_name = parser->current_param_name;
6127
+ parser->current_param_name = current_param_name;
6128
+ return saved_param_name;
6129
+ }
6130
+
6131
+ /**
6132
+ * Save the current param name as the return value and clear it.
6133
+ */
6134
+ static inline pm_constant_id_t
6135
+ pm_parser_current_param_name_unset(pm_parser_t *parser) {
6136
+ return pm_parser_current_param_name_set(parser, PM_CONSTANT_ID_UNSET);
6137
+ }
6138
+
6139
+ /**
6140
+ * Restore the current param name from the given value.
6141
+ */
6142
+ static inline void
6143
+ pm_parser_current_param_name_restore(pm_parser_t *parser, pm_constant_id_t saved_param_name) {
6144
+ parser->current_param_name = saved_param_name;
6145
+ }
6146
+
5896
6147
  /**
5897
6148
  * Check if any of the currently visible scopes contain a local variable
5898
6149
  * described by the given constant id.
@@ -5972,23 +6223,28 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
5972
6223
  /**
5973
6224
  * Add a parameter name to the current scope and check whether the name of the
5974
6225
  * parameter is unique or not.
6226
+ *
6227
+ * Returns `true` if this is a duplicate parameter name, otherwise returns
6228
+ * false.
5975
6229
  */
5976
- static void
6230
+ static bool
5977
6231
  pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
5978
6232
  // We want to check whether the parameter name is a numbered parameter or
5979
6233
  // not.
5980
6234
  pm_refute_numbered_parameter(parser, name->start, name->end);
5981
6235
 
5982
- // We want to ignore any parameter name that starts with an underscore.
5983
- if ((name->start < name->end) && (*name->start == '_')) return;
5984
-
5985
6236
  // Otherwise we'll fetch the constant id for the parameter name and check
5986
6237
  // whether it's already in the current scope.
5987
6238
  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
5988
6239
 
5989
6240
  if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
5990
- pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
6241
+ // Add an error if the parameter doesn't start with _ and has been seen before
6242
+ if ((name->start < name->end) && (*name->start != '_')) {
6243
+ pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
6244
+ }
6245
+ return true;
5991
6246
  }
6247
+ return false;
5992
6248
  }
5993
6249
 
5994
6250
  /**
@@ -6029,7 +6285,7 @@ char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
6029
6285
  } else if (*b < 0x80) {
6030
6286
  return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
6031
6287
  } else {
6032
- return (size_t) (pm_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
6288
+ return pm_encoding_utf_8_char_width(b, parser->end - b);
6033
6289
  }
6034
6290
  }
6035
6291
 
@@ -6042,7 +6298,7 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
6042
6298
  if (*b < 0x80) {
6043
6299
  return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
6044
6300
  } else {
6045
- return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
6301
+ return pm_encoding_utf_8_char_width(b, end - b);
6046
6302
  }
6047
6303
  }
6048
6304
 
@@ -6317,8 +6573,10 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) {
6317
6573
  */
6318
6574
  static void
6319
6575
  parser_lex_magic_comment_frozen_string_literal_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
6320
- if (start + 4 <= end && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
6576
+ if ((start + 4 <= end) && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
6321
6577
  parser->frozen_string_literal = true;
6578
+ } else if ((start + 5 <= end) && pm_strncasecmp(start, (const uint8_t *) "false", 5) == 0) {
6579
+ parser->frozen_string_literal = false;
6322
6580
  }
6323
6581
  }
6324
6582
 
@@ -6541,21 +6799,27 @@ context_terminator(pm_context_t context, pm_token_t *token) {
6541
6799
  return token->type == PM_TOKEN_BRACE_RIGHT;
6542
6800
  case PM_CONTEXT_PREDICATE:
6543
6801
  return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
6802
+ case PM_CONTEXT_NONE:
6803
+ return false;
6544
6804
  }
6545
6805
 
6546
6806
  return false;
6547
6807
  }
6548
6808
 
6549
- static bool
6550
- context_recoverable(pm_parser_t *parser, pm_token_t *token) {
6809
+ /**
6810
+ * Returns the context that the given token is found to be terminating, or
6811
+ * returns PM_CONTEXT_NONE.
6812
+ */
6813
+ static pm_context_t
6814
+ context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
6551
6815
  pm_context_node_t *context_node = parser->current_context;
6552
6816
 
6553
6817
  while (context_node != NULL) {
6554
- if (context_terminator(context_node->context, token)) return true;
6818
+ if (context_terminator(context_node->context, token)) return context_node->context;
6555
6819
  context_node = context_node->prev;
6556
6820
  }
6557
6821
 
6558
- return false;
6822
+ return PM_CONTEXT_NONE;
6559
6823
  }
6560
6824
 
6561
6825
  static bool
@@ -6583,7 +6847,7 @@ context_pop(pm_parser_t *parser) {
6583
6847
  }
6584
6848
 
6585
6849
  static bool
6586
- context_p(pm_parser_t *parser, pm_context_t context) {
6850
+ context_p(const pm_parser_t *parser, pm_context_t context) {
6587
6851
  pm_context_node_t *context_node = parser->current_context;
6588
6852
 
6589
6853
  while (context_node != NULL) {
@@ -6595,7 +6859,7 @@ context_p(pm_parser_t *parser, pm_context_t context) {
6595
6859
  }
6596
6860
 
6597
6861
  static bool
6598
- context_def_p(pm_parser_t *parser) {
6862
+ context_def_p(const pm_parser_t *parser) {
6599
6863
  pm_context_node_t *context_node = parser->current_context;
6600
6864
 
6601
6865
  while (context_node != NULL) {
@@ -6618,6 +6882,55 @@ context_def_p(pm_parser_t *parser) {
6618
6882
  return false;
6619
6883
  }
6620
6884
 
6885
+ /**
6886
+ * Returns a human readable string for the given context, used in error
6887
+ * messages.
6888
+ */
6889
+ static const char *
6890
+ context_human(pm_context_t context) {
6891
+ switch (context) {
6892
+ case PM_CONTEXT_NONE:
6893
+ assert(false && "unreachable");
6894
+ return "";
6895
+ case PM_CONTEXT_BEGIN: return "begin statement";
6896
+ case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
6897
+ case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
6898
+ case PM_CONTEXT_CASE_WHEN: return "'when' clause";
6899
+ case PM_CONTEXT_CASE_IN: return "'in' clause";
6900
+ case PM_CONTEXT_CLASS: return "class definition";
6901
+ case PM_CONTEXT_DEF: return "method definition";
6902
+ case PM_CONTEXT_DEF_PARAMS: return "method parameters";
6903
+ case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
6904
+ case PM_CONTEXT_ELSE: return "'else' clause";
6905
+ case PM_CONTEXT_ELSIF: return "'elsif' clause";
6906
+ case PM_CONTEXT_EMBEXPR: return "embedded expression";
6907
+ case PM_CONTEXT_ENSURE: return "'ensure' clause";
6908
+ case PM_CONTEXT_ENSURE_DEF: return "'ensure' clause";
6909
+ case PM_CONTEXT_FOR: return "for loop";
6910
+ case PM_CONTEXT_FOR_INDEX: return "for loop index";
6911
+ case PM_CONTEXT_IF: return "if statement";
6912
+ case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
6913
+ case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
6914
+ case PM_CONTEXT_MAIN: return "top level context";
6915
+ case PM_CONTEXT_MODULE: return "module definition";
6916
+ case PM_CONTEXT_PARENS: return "parentheses";
6917
+ case PM_CONTEXT_POSTEXE: return "'END' block";
6918
+ case PM_CONTEXT_PREDICATE: return "predicate";
6919
+ case PM_CONTEXT_PREEXE: return "'BEGIN' block";
6920
+ case PM_CONTEXT_RESCUE_ELSE: return "'else' clause";
6921
+ case PM_CONTEXT_RESCUE_ELSE_DEF: return "'else' clause";
6922
+ case PM_CONTEXT_RESCUE: return "'rescue' clause";
6923
+ case PM_CONTEXT_RESCUE_DEF: return "'rescue' clause";
6924
+ case PM_CONTEXT_SCLASS: return "singleton class definition";
6925
+ case PM_CONTEXT_UNLESS: return "unless statement";
6926
+ case PM_CONTEXT_UNTIL: return "until statement";
6927
+ case PM_CONTEXT_WHILE: return "while statement";
6928
+ }
6929
+
6930
+ assert(false && "unreachable");
6931
+ return "";
6932
+ }
6933
+
6621
6934
  /******************************************************************************/
6622
6935
  /* Specific token lexers */
6623
6936
  /******************************************************************************/
@@ -7982,7 +8295,6 @@ pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
7982
8295
  /**
7983
8296
  * When we're about to return from lexing the current token and we know for sure
7984
8297
  * that we have found an escape sequence, this function is called to copy the
7985
- *
7986
8298
  * contents of the token buffer into the current string on the parser so that it
7987
8299
  * can be attached to the correct node.
7988
8300
  */
@@ -7997,7 +8309,6 @@ pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
7997
8309
  * string. If we haven't pushed anything into the buffer, this means that we
7998
8310
  * never found an escape sequence, so we can directly reference the bounds of
7999
8311
  * the current string. Either way, at the return of this function it is expected
8000
- *
8001
8312
  * that parser->current_string is established in such a way that it can be
8002
8313
  * attached to a node.
8003
8314
  */
@@ -8016,7 +8327,6 @@ pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
8016
8327
  * point into the buffer because we're about to provide a string that has
8017
8328
  * different content than a direct slice of the source.
8018
8329
  *
8019
- *
8020
8330
  * It is expected that the parser's current token end will be pointing at one
8021
8331
  * byte past the backslash that starts the escape sequence.
8022
8332
  */
@@ -8070,6 +8380,34 @@ pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor,
8070
8380
  return whitespace;
8071
8381
  }
8072
8382
 
8383
+ /**
8384
+ * Lex past the delimiter of a percent literal. Handle newlines and heredocs
8385
+ * appropriately.
8386
+ */
8387
+ static uint8_t
8388
+ pm_lex_percent_delimiter(pm_parser_t *parser) {
8389
+ size_t eol_length = match_eol(parser);
8390
+
8391
+ if (eol_length) {
8392
+ if (parser->heredoc_end) {
8393
+ // If we have already lexed a heredoc, then the newline has already
8394
+ // been added to the list. In this case we want to just flush the
8395
+ // heredoc end.
8396
+ parser_flush_heredoc_end(parser);
8397
+ } else {
8398
+ // Otherwise, we'll add the newline to the list of newlines.
8399
+ pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
8400
+ }
8401
+
8402
+ const uint8_t delimiter = *parser->current.end;
8403
+ parser->current.end += eol_length;
8404
+
8405
+ return delimiter;
8406
+ }
8407
+
8408
+ return *parser->current.end++;
8409
+ }
8410
+
8073
8411
  /**
8074
8412
  * This is a convenience macro that will set the current token type, call the
8075
8413
  * lex callback, and then return from the parser_lex function.
@@ -8635,7 +8973,7 @@ parser_lex(pm_parser_t *parser) {
8635
8973
  // this is not a valid heredoc declaration. In this case we
8636
8974
  // will add an error, but we will still return a heredoc
8637
8975
  // start.
8638
- pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
8976
+ pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
8639
8977
  body_start = parser->end;
8640
8978
  } else {
8641
8979
  // Otherwise, we want to indicate that the body of the
@@ -8826,12 +9164,10 @@ parser_lex(pm_parser_t *parser) {
8826
9164
  LEX(PM_TOKEN_PLUS_EQUAL);
8827
9165
  }
8828
9166
 
8829
- bool spcarg = lex_state_spcarg_p(parser, space_seen);
8830
- if (spcarg) {
8831
- pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS);
8832
- }
8833
-
8834
- if (lex_state_beg_p(parser) || spcarg) {
9167
+ if (
9168
+ lex_state_beg_p(parser) ||
9169
+ (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
9170
+ ) {
8835
9171
  lex_state_set(parser, PM_LEX_STATE_BEG);
8836
9172
 
8837
9173
  if (pm_char_is_decimal_digit(peek(parser))) {
@@ -8871,11 +9207,12 @@ parser_lex(pm_parser_t *parser) {
8871
9207
  }
8872
9208
 
8873
9209
  bool spcarg = lex_state_spcarg_p(parser, space_seen);
8874
- if (spcarg) {
9210
+ bool is_beg = lex_state_beg_p(parser);
9211
+ if (!is_beg && spcarg) {
8875
9212
  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
8876
9213
  }
8877
9214
 
8878
- if (lex_state_beg_p(parser) || spcarg) {
9215
+ if (is_beg || spcarg) {
8879
9216
  lex_state_set(parser, PM_LEX_STATE_BEG);
8880
9217
  LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
8881
9218
  }
@@ -9026,15 +9363,8 @@ parser_lex(pm_parser_t *parser) {
9026
9363
  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
9027
9364
  }
9028
9365
 
9029
- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9030
-
9031
- size_t eol_length = match_eol(parser);
9032
- if (eol_length) {
9033
- parser->current.end += eol_length;
9034
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9035
- } else {
9036
- parser->current.end++;
9037
- }
9366
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9367
+ lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9038
9368
 
9039
9369
  if (parser->current.end < parser->end) {
9040
9370
  LEX(PM_TOKEN_STRING_BEGIN);
@@ -9054,7 +9384,7 @@ parser_lex(pm_parser_t *parser) {
9054
9384
  parser->current.end++;
9055
9385
 
9056
9386
  if (parser->current.end < parser->end) {
9057
- lex_mode_push_list(parser, false, *parser->current.end++);
9387
+ lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
9058
9388
  } else {
9059
9389
  lex_mode_push_list_eof(parser);
9060
9390
  }
@@ -9065,7 +9395,7 @@ parser_lex(pm_parser_t *parser) {
9065
9395
  parser->current.end++;
9066
9396
 
9067
9397
  if (parser->current.end < parser->end) {
9068
- lex_mode_push_list(parser, true, *parser->current.end++);
9398
+ lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
9069
9399
  } else {
9070
9400
  lex_mode_push_list_eof(parser);
9071
9401
  }
@@ -9076,9 +9406,8 @@ parser_lex(pm_parser_t *parser) {
9076
9406
  parser->current.end++;
9077
9407
 
9078
9408
  if (parser->current.end < parser->end) {
9079
- lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9080
- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9081
- parser->current.end++;
9409
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9410
+ lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9082
9411
  } else {
9083
9412
  lex_mode_push_regexp(parser, '\0', '\0');
9084
9413
  }
@@ -9089,9 +9418,8 @@ parser_lex(pm_parser_t *parser) {
9089
9418
  parser->current.end++;
9090
9419
 
9091
9420
  if (parser->current.end < parser->end) {
9092
- lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9093
- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9094
- parser->current.end++;
9421
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9422
+ lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9095
9423
  } else {
9096
9424
  lex_mode_push_string_eof(parser);
9097
9425
  }
@@ -9102,9 +9430,8 @@ parser_lex(pm_parser_t *parser) {
9102
9430
  parser->current.end++;
9103
9431
 
9104
9432
  if (parser->current.end < parser->end) {
9105
- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9106
- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9107
- parser->current.end++;
9433
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9434
+ lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9108
9435
  } else {
9109
9436
  lex_mode_push_string_eof(parser);
9110
9437
  }
@@ -9115,9 +9442,9 @@ parser_lex(pm_parser_t *parser) {
9115
9442
  parser->current.end++;
9116
9443
 
9117
9444
  if (parser->current.end < parser->end) {
9118
- lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9445
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9446
+ lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9119
9447
  lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
9120
- parser->current.end++;
9121
9448
  } else {
9122
9449
  lex_mode_push_string_eof(parser);
9123
9450
  }
@@ -9128,7 +9455,7 @@ parser_lex(pm_parser_t *parser) {
9128
9455
  parser->current.end++;
9129
9456
 
9130
9457
  if (parser->current.end < parser->end) {
9131
- lex_mode_push_list(parser, false, *parser->current.end++);
9458
+ lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
9132
9459
  } else {
9133
9460
  lex_mode_push_list_eof(parser);
9134
9461
  }
@@ -9139,7 +9466,7 @@ parser_lex(pm_parser_t *parser) {
9139
9466
  parser->current.end++;
9140
9467
 
9141
9468
  if (parser->current.end < parser->end) {
9142
- lex_mode_push_list(parser, true, *parser->current.end++);
9469
+ lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
9143
9470
  } else {
9144
9471
  lex_mode_push_list_eof(parser);
9145
9472
  }
@@ -9150,8 +9477,8 @@ parser_lex(pm_parser_t *parser) {
9150
9477
  parser->current.end++;
9151
9478
 
9152
9479
  if (parser->current.end < parser->end) {
9153
- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9154
- parser->current.end++;
9480
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9481
+ lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9155
9482
  } else {
9156
9483
  lex_mode_push_string_eof(parser);
9157
9484
  }
@@ -9888,15 +10215,22 @@ parser_lex(pm_parser_t *parser) {
9888
10215
  parser->next_start = NULL;
9889
10216
  }
9890
10217
 
9891
- // We'll check if we're at the end of the file. If we are, then we need to
9892
- // return the EOF token.
10218
+ // Now let's grab the information about the identifier off of the
10219
+ // current lex mode.
10220
+ pm_lex_mode_t *lex_mode = parser->lex_modes.current;
10221
+
10222
+ // We'll check if we're at the end of the file. If we are, then we
10223
+ // will add an error (because we weren't able to find the
10224
+ // terminator) but still continue parsing so that content after the
10225
+ // declaration of the heredoc can be parsed.
9893
10226
  if (parser->current.end >= parser->end) {
9894
- LEX(PM_TOKEN_EOF);
10227
+ pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
10228
+ parser->next_start = lex_mode->as.heredoc.next_start;
10229
+ parser->heredoc_end = parser->current.end;
10230
+ lex_state_set(parser, PM_LEX_STATE_END);
10231
+ LEX(PM_TOKEN_HEREDOC_END);
9895
10232
  }
9896
10233
 
9897
- // Now let's grab the information about the identifier off of the current
9898
- // lex mode.
9899
- pm_lex_mode_t *lex_mode = parser->lex_modes.current;
9900
10234
  const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
9901
10235
  size_t ident_length = lex_mode->as.heredoc.ident_length;
9902
10236
 
@@ -10184,8 +10518,8 @@ parser_lex(pm_parser_t *parser) {
10184
10518
  typedef enum {
10185
10519
  PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
10186
10520
  PM_BINDING_POWER_STATEMENT = 2,
10187
- PM_BINDING_POWER_MODIFIER = 4, // if unless until while
10188
- PM_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue
10521
+ PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
10522
+ PM_BINDING_POWER_MODIFIER = 6, // if unless until while
10189
10523
  PM_BINDING_POWER_COMPOSITION = 8, // and or
10190
10524
  PM_BINDING_POWER_NOT = 10, // not
10191
10525
  PM_BINDING_POWER_MATCH = 12, // => in
@@ -10239,15 +10573,15 @@ typedef struct {
10239
10573
  #define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
10240
10574
 
10241
10575
  pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
10576
+ // rescue
10577
+ [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
10578
+
10242
10579
  // if unless until while
10243
10580
  [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10244
10581
  [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10245
10582
  [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10246
10583
  [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10247
10584
 
10248
- // rescue
10249
- [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
10250
-
10251
10585
  // and or
10252
10586
  [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
10253
10587
  [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
@@ -10866,7 +11200,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10866
11200
  return target;
10867
11201
  }
10868
11202
 
10869
- if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
11203
+ if (char_is_identifier_start(parser, call->message_loc.start)) {
10870
11204
  // When we get here, we have a method call, because it was
10871
11205
  // previously marked as a method call but now we have an =. This
10872
11206
  // looks like:
@@ -10984,6 +11318,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
10984
11318
  static pm_node_t *
10985
11319
  parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power) {
10986
11320
  pm_node_t *result = parse_targets(parser, first_target, binding_power);
11321
+ accept1(parser, PM_TOKEN_NEWLINE);
10987
11322
 
10988
11323
  // Ensure that we have either an = or a ) after the targets.
10989
11324
  if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
@@ -11084,8 +11419,9 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
11084
11419
 
11085
11420
  if (token_begins_expression_p(parser->current.type)) {
11086
11421
  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11087
- } else if (pm_parser_local_depth(parser, &operator) == -1) {
11088
- pm_parser_err_token(parser, &operator, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11422
+ }
11423
+ else {
11424
+ pm_parser_scope_forwarding_keywords_check(parser, &operator);
11089
11425
  }
11090
11426
 
11091
11427
  element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
@@ -11234,13 +11570,8 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11234
11570
  if (token_begins_expression_p(parser->current.type)) {
11235
11571
  expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_ARGUMENT);
11236
11572
  } else {
11237
- if (pm_parser_local_depth(parser, &operator) == -1) {
11238
- // A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
11239
- pm_constant_id_t ellipsis_id = pm_parser_constant_id_constant(parser, "...", 3);
11240
- if (pm_parser_local_depth_constant_id(parser, ellipsis_id) == -1) {
11241
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
11242
- }
11243
- }
11573
+ // A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
11574
+ pm_parser_scope_forwarding_block_check(parser, &operator);
11244
11575
  }
11245
11576
 
11246
11577
  argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
@@ -11258,10 +11589,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11258
11589
  pm_token_t operator = parser->previous;
11259
11590
 
11260
11591
  if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
11261
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
11262
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
11263
- }
11264
-
11592
+ pm_parser_scope_forwarding_positionals_check(parser, &operator);
11265
11593
  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
11266
11594
  } else {
11267
11595
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
@@ -11287,15 +11615,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11287
11615
  pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
11288
11616
  argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
11289
11617
  } else {
11290
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
11291
- pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
11292
- }
11618
+ pm_parser_scope_forwarding_all_check(parser, &parser->previous);
11293
11619
  if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
11294
11620
  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
11295
11621
  }
11296
11622
 
11297
11623
  argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
11298
11624
  parse_arguments_append(parser, arguments, argument);
11625
+ arguments->has_forwarding = true;
11299
11626
  parsed_forwarding_arguments = true;
11300
11627
  break;
11301
11628
  }
@@ -11338,6 +11665,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11338
11665
  }
11339
11666
 
11340
11667
  parsed_bare_hash = true;
11668
+ } else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
11669
+ // TODO: Could we solve this with binding powers instead?
11670
+ pm_parser_err_current(parser, PM_ERR_ARGUMENT_IN);
11341
11671
  }
11342
11672
 
11343
11673
  parse_arguments_append(parser, arguments, argument);
@@ -11414,7 +11744,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
11414
11744
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11415
11745
  pm_token_t name = parser->previous;
11416
11746
  value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
11417
- pm_parser_parameter_name_check(parser, &name);
11747
+ if (pm_parser_parameter_name_check(parser, &name)) {
11748
+ pm_node_flag_set_repeated_parameter(value);
11749
+ }
11418
11750
  pm_parser_local_add_token(parser, &name);
11419
11751
  }
11420
11752
 
@@ -11424,7 +11756,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
11424
11756
  pm_token_t name = parser->previous;
11425
11757
 
11426
11758
  param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
11427
- pm_parser_parameter_name_check(parser, &name);
11759
+ if (pm_parser_parameter_name_check(parser, &name)) {
11760
+ pm_node_flag_set_repeated_parameter(param);
11761
+ }
11428
11762
  pm_parser_local_add_token(parser, &name);
11429
11763
  }
11430
11764
 
@@ -11541,19 +11875,20 @@ parse_parameters(
11541
11875
  pm_token_t operator = parser->previous;
11542
11876
  pm_token_t name;
11543
11877
 
11878
+ bool repeated = false;
11544
11879
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11545
11880
  name = parser->previous;
11546
- pm_parser_parameter_name_check(parser, &name);
11881
+ repeated = pm_parser_parameter_name_check(parser, &name);
11547
11882
  pm_parser_local_add_token(parser, &name);
11548
11883
  } else {
11549
11884
  name = not_provided(parser);
11550
-
11551
- if (allows_forwarding_parameters) {
11552
- pm_parser_local_add_token(parser, &operator);
11553
- }
11885
+ parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
11554
11886
  }
11555
11887
 
11556
11888
  pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
11889
+ if (repeated) {
11890
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
11891
+ }
11557
11892
  if (params->block == NULL) {
11558
11893
  pm_parameters_node_block_set(params, param);
11559
11894
  } else {
@@ -11572,9 +11907,8 @@ parse_parameters(
11572
11907
  update_parameter_state(parser, &parser->current, &order);
11573
11908
  parser_lex(parser);
11574
11909
 
11575
- if (allows_forwarding_parameters) {
11576
- pm_parser_local_add_token(parser, &parser->previous);
11577
- }
11910
+ parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
11911
+ parser->current_scope->forwarding_params |= PM_FORWARDING_ALL;
11578
11912
 
11579
11913
  pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
11580
11914
  if (params->keyword_rest != NULL) {
@@ -11626,20 +11960,23 @@ parse_parameters(
11626
11960
  }
11627
11961
 
11628
11962
  pm_token_t name = parser->previous;
11629
- pm_parser_parameter_name_check(parser, &name);
11963
+ bool repeated = pm_parser_parameter_name_check(parser, &name);
11630
11964
  pm_parser_local_add_token(parser, &name);
11631
11965
 
11632
11966
  if (accept1(parser, PM_TOKEN_EQUAL)) {
11633
11967
  pm_token_t operator = parser->previous;
11634
11968
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11635
- pm_constant_id_t old_param_name = parser->current_param_name;
11636
- parser->current_param_name = pm_parser_constant_id_token(parser, &name);
11969
+
11970
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &name));
11637
11971
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
11638
11972
 
11639
11973
  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
11974
+ if (repeated) {
11975
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
11976
+ }
11640
11977
  pm_parameters_node_optionals_append(params, param);
11641
11978
 
11642
- parser->current_param_name = old_param_name;
11979
+ pm_parser_current_param_name_restore(parser, saved_param_name);
11643
11980
  context_pop(parser);
11644
11981
 
11645
11982
  // If parsing the value of the parameter resulted in error recovery,
@@ -11651,9 +11988,15 @@ parse_parameters(
11651
11988
  }
11652
11989
  } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
11653
11990
  pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
11991
+ if (repeated) {
11992
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
11993
+ }
11654
11994
  pm_parameters_node_requireds_append(params, (pm_node_t *) param);
11655
11995
  } else {
11656
11996
  pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
11997
+ if (repeated) {
11998
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
11999
+ }
11657
12000
  pm_parameters_node_posts_append(params, (pm_node_t *) param);
11658
12001
  }
11659
12002
 
@@ -11668,7 +12011,7 @@ parse_parameters(
11668
12011
  pm_token_t local = name;
11669
12012
  local.end -= 1;
11670
12013
 
11671
- pm_parser_parameter_name_check(parser, &local);
12014
+ bool repeated = pm_parser_parameter_name_check(parser, &local);
11672
12015
  pm_parser_local_add_token(parser, &local);
11673
12016
 
11674
12017
  switch (parser->current.type) {
@@ -11676,6 +12019,9 @@ parse_parameters(
11676
12019
  case PM_TOKEN_PARENTHESIS_RIGHT:
11677
12020
  case PM_TOKEN_PIPE: {
11678
12021
  pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
12022
+ if (repeated) {
12023
+ pm_node_flag_set_repeated_parameter(param);
12024
+ }
11679
12025
  pm_parameters_node_keywords_append(params, param);
11680
12026
  break;
11681
12027
  }
@@ -11687,6 +12033,9 @@ parse_parameters(
11687
12033
  }
11688
12034
 
11689
12035
  pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
12036
+ if (repeated) {
12037
+ pm_node_flag_set_repeated_parameter(param);
12038
+ }
11690
12039
  pm_parameters_node_keywords_append(params, param);
11691
12040
  break;
11692
12041
  }
@@ -11695,17 +12044,22 @@ parse_parameters(
11695
12044
 
11696
12045
  if (token_begins_expression_p(parser->current.type)) {
11697
12046
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11698
- pm_constant_id_t old_param_name = parser->current_param_name;
11699
- parser->current_param_name = pm_parser_constant_id_token(parser, &local);
12047
+
12048
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &local));
11700
12049
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
11701
- parser->current_param_name = old_param_name;
12050
+
12051
+ pm_parser_current_param_name_restore(parser, saved_param_name);
11702
12052
  context_pop(parser);
12053
+
11703
12054
  param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
11704
12055
  }
11705
12056
  else {
11706
12057
  param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
11707
12058
  }
11708
12059
 
12060
+ if (repeated) {
12061
+ pm_node_flag_set_repeated_parameter(param);
12062
+ }
11709
12063
  pm_parameters_node_keywords_append(params, param);
11710
12064
 
11711
12065
  // If parsing the value of the parameter resulted in error recovery,
@@ -11728,20 +12082,21 @@ parse_parameters(
11728
12082
 
11729
12083
  pm_token_t operator = parser->previous;
11730
12084
  pm_token_t name;
11731
-
12085
+ bool repeated = false;
11732
12086
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11733
12087
  name = parser->previous;
11734
- pm_parser_parameter_name_check(parser, &name);
12088
+ repeated = pm_parser_parameter_name_check(parser, &name);
11735
12089
  pm_parser_local_add_token(parser, &name);
11736
12090
  } else {
11737
12091
  name = not_provided(parser);
11738
12092
 
11739
- if (allows_forwarding_parameters) {
11740
- pm_parser_local_add_token(parser, &operator);
11741
- }
12093
+ parser->current_scope->forwarding_params |= PM_FORWARDING_POSITIONALS;
11742
12094
  }
11743
12095
 
11744
12096
  pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
12097
+ if (repeated) {
12098
+ pm_node_flag_set_repeated_parameter(param);
12099
+ }
11745
12100
  if (params->rest == NULL) {
11746
12101
  pm_parameters_node_rest_set(params, param);
11747
12102
  } else {
@@ -11764,19 +12119,21 @@ parse_parameters(
11764
12119
  } else {
11765
12120
  pm_token_t name;
11766
12121
 
12122
+ bool repeated = false;
11767
12123
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11768
12124
  name = parser->previous;
11769
- pm_parser_parameter_name_check(parser, &name);
12125
+ repeated = pm_parser_parameter_name_check(parser, &name);
11770
12126
  pm_parser_local_add_token(parser, &name);
11771
12127
  } else {
11772
12128
  name = not_provided(parser);
11773
12129
 
11774
- if (allows_forwarding_parameters) {
11775
- pm_parser_local_add_token(parser, &operator);
11776
- }
12130
+ parser->current_scope->forwarding_params |= PM_FORWARDING_KEYWORDS;
11777
12131
  }
11778
12132
 
11779
12133
  param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
12134
+ if (repeated) {
12135
+ pm_node_flag_set_repeated_parameter(param);
12136
+ }
11780
12137
  }
11781
12138
 
11782
12139
  if (params->keyword_rest == NULL) {
@@ -12012,10 +12369,13 @@ parse_block_parameters(
12012
12369
  if ((opening->type != PM_TOKEN_NOT_PROVIDED) && accept1(parser, PM_TOKEN_SEMICOLON)) {
12013
12370
  do {
12014
12371
  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
12015
- pm_parser_parameter_name_check(parser, &parser->previous);
12372
+ bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
12016
12373
  pm_parser_local_add_token(parser, &parser->previous);
12017
12374
 
12018
12375
  pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
12376
+ if (repeated) {
12377
+ pm_node_flag_set_repeated_parameter((pm_node_t *)local);
12378
+ }
12019
12379
  pm_block_parameters_node_append_local(block_parameters, local);
12020
12380
  } while (accept1(parser, PM_TOKEN_COMMA));
12021
12381
  }
@@ -12031,8 +12391,10 @@ parse_block(pm_parser_t *parser) {
12031
12391
  pm_token_t opening = parser->previous;
12032
12392
  accept1(parser, PM_TOKEN_NEWLINE);
12033
12393
 
12394
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
12034
12395
  pm_accepts_block_stack_push(parser, true);
12035
12396
  pm_parser_scope_push(parser, false);
12397
+
12036
12398
  pm_block_parameters_node_t *block_parameters = NULL;
12037
12399
 
12038
12400
  if (accept1(parser, PM_TOKEN_PIPE)) {
@@ -12053,12 +12415,6 @@ parse_block(pm_parser_t *parser) {
12053
12415
  pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
12054
12416
  }
12055
12417
 
12056
- uint32_t locals_body_index = 0;
12057
-
12058
- if (block_parameters) {
12059
- locals_body_index = (uint32_t) parser->current_scope->locals.size;
12060
- }
12061
-
12062
12418
  accept1(parser, PM_TOKEN_NEWLINE);
12063
12419
  pm_node_t *statements = NULL;
12064
12420
 
@@ -12090,13 +12446,14 @@ parse_block(pm_parser_t *parser) {
12090
12446
 
12091
12447
  if (parameters == NULL && (maximum > 0)) {
12092
12448
  parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = opening.start, .end = parser->previous.end }, maximum);
12093
- locals_body_index = maximum;
12094
12449
  }
12095
12450
 
12096
12451
  pm_constant_id_list_t locals = parser->current_scope->locals;
12097
12452
  pm_parser_scope_pop(parser);
12098
12453
  pm_accepts_block_stack_pop(parser);
12099
- return pm_block_node_create(parser, &locals, locals_body_index, &opening, parameters, statements, &parser->previous);
12454
+ pm_parser_current_param_name_restore(parser, saved_param_name);
12455
+
12456
+ return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
12100
12457
  }
12101
12458
 
12102
12459
  /**
@@ -12157,14 +12514,20 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
12157
12514
  }
12158
12515
 
12159
12516
  if (block != NULL) {
12160
- if (arguments->block == NULL) {
12517
+ if (arguments->block == NULL && !arguments->has_forwarding) {
12161
12518
  arguments->block = (pm_node_t *) block;
12162
12519
  } else {
12163
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
12164
- if (arguments->arguments == NULL) {
12165
- arguments->arguments = pm_arguments_node_create(parser);
12520
+ if (arguments->has_forwarding) {
12521
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
12522
+ } else {
12523
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
12524
+ }
12525
+ if (arguments->block != NULL) {
12526
+ if (arguments->arguments == NULL) {
12527
+ arguments->arguments = pm_arguments_node_create(parser);
12528
+ }
12529
+ pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
12166
12530
  }
12167
- pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
12168
12531
  arguments->block = (pm_node_t *) block;
12169
12532
  }
12170
12533
  }
@@ -12384,8 +12747,14 @@ static inline pm_node_flags_t
12384
12747
  parse_unescaped_encoding(const pm_parser_t *parser) {
12385
12748
  if (parser->explicit_encoding != NULL) {
12386
12749
  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
12750
+ // If the there's an explicit encoding and it's using a UTF-8 escape
12751
+ // sequence, then mark the string as UTF-8.
12387
12752
  return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
12388
12753
  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
12754
+ // If there's a non-UTF-8 escape sequence being used, then the
12755
+ // string uses the source encoding, unless the source is marked as
12756
+ // US-ASCII. In that case the string is forced as ASCII-8BIT in
12757
+ // order to keep the string valid.
12389
12758
  return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
12390
12759
  }
12391
12760
  }
@@ -12509,14 +12878,54 @@ parse_string_part(pm_parser_t *parser) {
12509
12878
  }
12510
12879
  }
12511
12880
 
12881
+ /**
12882
+ * When creating a symbol, unary operators that cannot be binary operators
12883
+ * automatically drop trailing `@` characters. This happens at the parser level,
12884
+ * such that `~@` is parsed as `~` and `!@` is parsed as `!`. We do that here.
12885
+ */
12886
+ static const uint8_t *
12887
+ parse_operator_symbol_name(const pm_token_t *name) {
12888
+ switch (name->type) {
12889
+ case PM_TOKEN_TILDE:
12890
+ case PM_TOKEN_BANG:
12891
+ if (name->end[-1] == '@') return name->end - 1;
12892
+ /* fallthrough */
12893
+ default:
12894
+ return name->end;
12895
+ }
12896
+ }
12897
+
12898
+ static pm_node_t *
12899
+ parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
12900
+ pm_token_t closing = not_provided(parser);
12901
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
12902
+
12903
+ const uint8_t *end = parse_operator_symbol_name(&parser->current);
12904
+
12905
+ if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
12906
+ parser_lex(parser);
12907
+
12908
+ pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
12909
+ pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
12910
+
12911
+ return (pm_node_t *) symbol;
12912
+ }
12913
+
12914
+ /**
12915
+ * Parse a symbol node. This function will get called immediately after finding
12916
+ * a symbol opening token. This handles parsing bare symbols and interpolated
12917
+ * symbols.
12918
+ */
12512
12919
  static pm_node_t *
12513
12920
  parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state) {
12514
- pm_token_t opening = parser->previous;
12921
+ const pm_token_t opening = parser->previous;
12515
12922
 
12516
12923
  if (lex_mode->mode != PM_LEX_STRING) {
12517
12924
  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
12518
12925
 
12519
12926
  switch (parser->current.type) {
12927
+ case PM_CASE_OPERATOR:
12928
+ return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
12520
12929
  case PM_TOKEN_IDENTIFIER:
12521
12930
  case PM_TOKEN_CONSTANT:
12522
12931
  case PM_TOKEN_INSTANCE_VARIABLE:
@@ -12528,10 +12937,6 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12528
12937
  case PM_CASE_KEYWORD:
12529
12938
  parser_lex(parser);
12530
12939
  break;
12531
- case PM_CASE_OPERATOR:
12532
- lex_state_set(parser, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
12533
- parser_lex(parser);
12534
- break;
12535
12940
  default:
12536
12941
  expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
12537
12942
  break;
@@ -12541,6 +12946,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12541
12946
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12542
12947
 
12543
12948
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
12949
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
12950
+
12544
12951
  return (pm_node_t *) symbol;
12545
12952
  }
12546
12953
 
@@ -12637,7 +13044,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12637
13044
  } else {
12638
13045
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
12639
13046
  }
12640
- return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
13047
+
13048
+ return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
12641
13049
  }
12642
13050
 
12643
13051
  /**
@@ -12647,8 +13055,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12647
13055
  static inline pm_node_t *
12648
13056
  parse_undef_argument(pm_parser_t *parser) {
12649
13057
  switch (parser->current.type) {
13058
+ case PM_CASE_OPERATOR: {
13059
+ const pm_token_t opening = not_provided(parser);
13060
+ return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
13061
+ }
12650
13062
  case PM_CASE_KEYWORD:
12651
- case PM_CASE_OPERATOR:
12652
13063
  case PM_TOKEN_CONSTANT:
12653
13064
  case PM_TOKEN_IDENTIFIER:
12654
13065
  case PM_TOKEN_METHOD_NAME: {
@@ -12659,6 +13070,8 @@ parse_undef_argument(pm_parser_t *parser) {
12659
13070
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12660
13071
 
12661
13072
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
13073
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
13074
+
12662
13075
  return (pm_node_t *) symbol;
12663
13076
  }
12664
13077
  case PM_TOKEN_SYMBOL_BEGIN: {
@@ -12682,21 +13095,24 @@ parse_undef_argument(pm_parser_t *parser) {
12682
13095
  static inline pm_node_t *
12683
13096
  parse_alias_argument(pm_parser_t *parser, bool first) {
12684
13097
  switch (parser->current.type) {
12685
- case PM_CASE_OPERATOR:
13098
+ case PM_CASE_OPERATOR: {
13099
+ const pm_token_t opening = not_provided(parser);
13100
+ return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
13101
+ }
12686
13102
  case PM_CASE_KEYWORD:
12687
13103
  case PM_TOKEN_CONSTANT:
12688
13104
  case PM_TOKEN_IDENTIFIER:
12689
13105
  case PM_TOKEN_METHOD_NAME: {
12690
- if (first) {
12691
- lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
12692
- }
12693
-
13106
+ if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
12694
13107
  parser_lex(parser);
13108
+
12695
13109
  pm_token_t opening = not_provided(parser);
12696
13110
  pm_token_t closing = not_provided(parser);
12697
13111
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12698
13112
 
12699
13113
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
13114
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
13115
+
12700
13116
  return (pm_node_t *) symbol;
12701
13117
  }
12702
13118
  case PM_TOKEN_SYMBOL_BEGIN: {
@@ -12733,6 +13149,65 @@ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
12733
13149
  return false;
12734
13150
  }
12735
13151
 
13152
+ /**
13153
+ * Parse an identifier into either a local variable read. If the local variable
13154
+ * is not found, it returns NULL instead.
13155
+ */
13156
+ static pm_local_variable_read_node_t *
13157
+ parse_variable(pm_parser_t *parser) {
13158
+ int depth;
13159
+ if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
13160
+ return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
13161
+ }
13162
+
13163
+ if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
13164
+ // Now that we know we have a numbered parameter, we need to check
13165
+ // if it's allowed in this context. If it is, then we will create a
13166
+ // local variable read. If it's not, then we'll create a normal call
13167
+ // node but add an error.
13168
+ if (parser->current_scope->explicit_params) {
13169
+ pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
13170
+ } else if (outer_scope_using_numbered_parameters_p(parser)) {
13171
+ pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
13172
+ } else {
13173
+ // Indicate that this scope is using numbered params so that child
13174
+ // scopes cannot.
13175
+ uint8_t number = parser->previous.start[1];
13176
+
13177
+ // We subtract the value for the character '0' to get the actual
13178
+ // integer value of the number (only _1 through _9 are valid)
13179
+ uint8_t numbered_parameters = (uint8_t) (number - '0');
13180
+ if (numbered_parameters > parser->current_scope->numbered_parameters) {
13181
+ parser->current_scope->numbered_parameters = numbered_parameters;
13182
+ pm_parser_numbered_parameters_set(parser, numbered_parameters);
13183
+ }
13184
+
13185
+ // When you use a numbered parameter, it implies the existence
13186
+ // of all of the locals that exist before it. For example,
13187
+ // referencing _2 means that _1 must exist. Therefore here we
13188
+ // loop through all of the possibilities and add them into the
13189
+ // constant pool.
13190
+ uint8_t current = '1';
13191
+ uint8_t *value;
13192
+
13193
+ while (current < number) {
13194
+ value = malloc(2);
13195
+ value[0] = '_';
13196
+ value[1] = current++;
13197
+ pm_parser_local_add_owned(parser, value, 2);
13198
+ }
13199
+
13200
+ // Now we can add the actual token that is being used. For
13201
+ // this one we can add a shared version since it is directly
13202
+ // referenced in the source.
13203
+ pm_parser_local_add_token(parser, &parser->previous);
13204
+ return pm_local_variable_read_node_create(parser, &parser->previous, 0);
13205
+ }
13206
+ }
13207
+
13208
+ return NULL;
13209
+ }
13210
+
12736
13211
  /**
12737
13212
  * Parse an identifier into either a local variable read or a call.
12738
13213
  */
@@ -12741,56 +13216,8 @@ parse_variable_call(pm_parser_t *parser) {
12741
13216
  pm_node_flags_t flags = 0;
12742
13217
 
12743
13218
  if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
12744
- int depth;
12745
- if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
12746
- return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
12747
- }
12748
-
12749
- if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
12750
- // Now that we know we have a numbered parameter, we need to check
12751
- // if it's allowed in this context. If it is, then we will create a
12752
- // local variable read. If it's not, then we'll create a normal call
12753
- // node but add an error.
12754
- if (parser->current_scope->explicit_params) {
12755
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
12756
- } else if (outer_scope_using_numbered_parameters_p(parser)) {
12757
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
12758
- } else {
12759
- // Indicate that this scope is using numbered params so that child
12760
- // scopes cannot.
12761
- uint8_t number = parser->previous.start[1];
12762
-
12763
- // We subtract the value for the character '0' to get the actual
12764
- // integer value of the number (only _1 through _9 are valid)
12765
- uint8_t numbered_parameters = (uint8_t) (number - '0');
12766
- if (numbered_parameters > parser->current_scope->numbered_parameters) {
12767
- parser->current_scope->numbered_parameters = numbered_parameters;
12768
- pm_parser_numbered_parameters_set(parser, numbered_parameters);
12769
- }
12770
-
12771
- // When you use a numbered parameter, it implies the existence
12772
- // of all of the locals that exist before it. For example,
12773
- // referencing _2 means that _1 must exist. Therefore here we
12774
- // loop through all of the possibilities and add them into the
12775
- // constant pool.
12776
- uint8_t current = '1';
12777
- uint8_t *value;
12778
-
12779
- while (current < number) {
12780
- value = malloc(2);
12781
- value[0] = '_';
12782
- value[1] = current++;
12783
- pm_parser_local_add_owned(parser, value, 2);
12784
- }
12785
-
12786
- // Now we can add the actual token that is being used. For
12787
- // this one we can add a shared version since it is directly
12788
- // referenced in the source.
12789
- pm_parser_local_add_token(parser, &parser->previous);
12790
- return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
12791
- }
12792
- }
12793
-
13219
+ pm_local_variable_read_node_t *node = parse_variable(parser);
13220
+ if (node != NULL) return (pm_node_t *) node;
12794
13221
  flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
12795
13222
  }
12796
13223
 
@@ -13076,43 +13503,77 @@ parse_pattern_keyword_rest(pm_parser_t *parser) {
13076
13503
  return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
13077
13504
  }
13078
13505
 
13506
+ /**
13507
+ * Create an implicit node for the value of a hash pattern that has omitted the
13508
+ * value. This will use an implicit local variable target.
13509
+ */
13510
+ static pm_node_t *
13511
+ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_symbol_node_t *key) {
13512
+ const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
13513
+ pm_constant_id_t name = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
13514
+
13515
+ int current_depth = pm_parser_local_depth_constant_id(parser, name);
13516
+ uint32_t depth;
13517
+
13518
+ if (current_depth == -1) {
13519
+ pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
13520
+ depth = 0;
13521
+ } else {
13522
+ depth = (uint32_t) current_depth;
13523
+ }
13524
+
13525
+ pm_local_variable_target_node_t *target = pm_local_variable_target_node_create_values(parser, value_loc, name, depth);
13526
+ return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
13527
+ }
13528
+
13079
13529
  /**
13080
13530
  * Parse a hash pattern.
13081
13531
  */
13082
13532
  static pm_hash_pattern_node_t *
13083
- parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
13533
+ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) {
13084
13534
  pm_node_list_t assocs = { 0 };
13085
13535
  pm_node_t *rest = NULL;
13086
13536
 
13087
- switch (PM_NODE_TYPE(first_assoc)) {
13088
- case PM_ASSOC_NODE: {
13089
- if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13090
- // Here we have a value for the first assoc in the list, so we will
13091
- // parse it now and update the first assoc.
13092
- pm_node_t *value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
13537
+ switch (PM_NODE_TYPE(first_node)) {
13538
+ case PM_ASSOC_SPLAT_NODE:
13539
+ case PM_NO_KEYWORDS_PARAMETER_NODE:
13540
+ rest = first_node;
13541
+ break;
13542
+ case PM_SYMBOL_NODE: {
13543
+ if (pm_symbol_node_label_p(first_node)) {
13544
+ pm_node_t *value;
13545
+
13546
+ if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13547
+ // Here we have a value for the first assoc in the list, so
13548
+ // we will parse it now.
13549
+ value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
13550
+ } else {
13551
+ // Otherwise, we will create an implicit local variable
13552
+ // target for the value.
13553
+ value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) first_node);
13554
+ }
13093
13555
 
13094
- pm_assoc_node_t *assoc = (pm_assoc_node_t *) first_assoc;
13095
- assoc->base.location.end = value->location.end;
13096
- assoc->value = value;
13097
- } else {
13098
- pm_node_t *key = ((pm_assoc_node_t *) first_assoc)->key;
13556
+ pm_token_t operator = not_provided(parser);
13557
+ pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
13099
13558
 
13100
- if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
13101
- const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
13102
- pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
13103
- }
13559
+ pm_node_list_append(&assocs, assoc);
13560
+ break;
13104
13561
  }
13562
+ }
13563
+ /* fallthrough */
13564
+ default: {
13565
+ // If we get anything else, then this is an error. For this we'll
13566
+ // create a missing node for the value and create an assoc node for
13567
+ // the first node in the list.
13568
+ pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
13105
13569
 
13106
- pm_node_list_append(&assocs, first_assoc);
13570
+ pm_token_t operator = not_provided(parser);
13571
+ pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
13572
+ pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
13573
+
13574
+ pm_node_list_append(&assocs, assoc);
13107
13575
  break;
13108
13576
  }
13109
- case PM_ASSOC_SPLAT_NODE:
13110
- case PM_NO_KEYWORDS_PARAMETER_NODE:
13111
- rest = first_assoc;
13112
- break;
13113
- default:
13114
- assert(false);
13115
- break;
13116
13577
  }
13117
13578
 
13118
13579
  // If there are any other assocs, then we'll parse them now.
@@ -13141,6 +13602,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
13141
13602
  } else {
13142
13603
  const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
13143
13604
  pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
13605
+ value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) key);
13144
13606
  }
13145
13607
 
13146
13608
  pm_token_t operator = not_provided(parser);
@@ -13246,45 +13708,29 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13246
13708
  // pattern node.
13247
13709
  node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
13248
13710
  } else {
13249
- pm_node_t *first_assoc;
13711
+ pm_node_t *first_node;
13250
13712
 
13251
13713
  switch (parser->current.type) {
13252
- case PM_TOKEN_LABEL: {
13714
+ case PM_TOKEN_LABEL:
13253
13715
  parser_lex(parser);
13254
-
13255
- pm_symbol_node_t *key = pm_symbol_node_label_create(parser, &parser->previous);
13256
- pm_token_t operator = not_provided(parser);
13257
-
13258
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
13716
+ first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
13259
13717
  break;
13260
- }
13261
13718
  case PM_TOKEN_USTAR_STAR:
13262
- first_assoc = parse_pattern_keyword_rest(parser);
13719
+ first_node = parse_pattern_keyword_rest(parser);
13263
13720
  break;
13264
- case PM_TOKEN_STRING_BEGIN: {
13265
- pm_node_t *key = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
13266
- pm_token_t operator = not_provided(parser);
13267
-
13268
- if (!pm_symbol_node_label_p(key)) {
13269
- pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
13270
- }
13271
-
13272
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL);
13721
+ case PM_TOKEN_STRING_BEGIN:
13722
+ first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
13273
13723
  break;
13274
- }
13275
13724
  default: {
13276
13725
  parser_lex(parser);
13277
13726
  pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
13278
13727
 
13279
- pm_missing_node_t *key = pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
13280
- pm_token_t operator = not_provided(parser);
13281
-
13282
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
13728
+ first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
13283
13729
  break;
13284
13730
  }
13285
13731
  }
13286
13732
 
13287
- node = parse_pattern_hash(parser, first_assoc);
13733
+ node = parse_pattern_hash(parser, first_node);
13288
13734
 
13289
13735
  accept1(parser, PM_TOKEN_NEWLINE);
13290
13736
  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
@@ -13350,7 +13796,16 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13350
13796
  switch (parser->current.type) {
13351
13797
  case PM_TOKEN_IDENTIFIER: {
13352
13798
  parser_lex(parser);
13353
- pm_node_t *variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
13799
+ pm_node_t *variable = (pm_node_t *) parse_variable(parser);
13800
+ if (variable == NULL) {
13801
+ if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0 && pm_token_is_it(parser->previous.start, parser->previous.end)) {
13802
+ pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
13803
+ variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
13804
+ } else {
13805
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE, (int) (parser->previous.end - parser->previous.start), parser->previous.start);
13806
+ variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
13807
+ }
13808
+ }
13354
13809
 
13355
13810
  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
13356
13811
  }
@@ -13519,9 +13974,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
13519
13974
  case PM_TOKEN_LABEL: {
13520
13975
  parser_lex(parser);
13521
13976
  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
13522
- pm_token_t operator = not_provided(parser);
13523
-
13524
- return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL));
13977
+ return (pm_node_t *) parse_pattern_hash(parser, key);
13525
13978
  }
13526
13979
  case PM_TOKEN_USTAR_STAR: {
13527
13980
  node = parse_pattern_keyword_rest(parser);
@@ -13544,8 +13997,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
13544
13997
  // If we got a dynamic label symbol, then we need to treat it like the
13545
13998
  // beginning of a hash pattern.
13546
13999
  if (pm_symbol_node_label_p(node)) {
13547
- pm_token_t operator = not_provided(parser);
13548
- return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, node, &operator, NULL));
14000
+ return (pm_node_t *) parse_pattern_hash(parser, node);
13549
14001
  }
13550
14002
 
13551
14003
  if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
@@ -13644,7 +14096,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13644
14096
  assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
13645
14097
 
13646
14098
  bool concating = false;
13647
- bool state_is_arg_labeled = lex_state_p(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
14099
+ bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
13648
14100
 
13649
14101
  while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
13650
14102
  pm_node_t *node = NULL;
@@ -13719,7 +14171,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13719
14171
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13720
14172
  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
13721
14173
  } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
13722
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
14174
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
13723
14175
  } else if (match1(parser, PM_TOKEN_EOF)) {
13724
14176
  pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_TERM);
13725
14177
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
@@ -13741,7 +14193,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13741
14193
  pm_node_flag_set(node, parse_unescaped_encoding(parser));
13742
14194
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13743
14195
  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
13744
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
14196
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
13745
14197
  } else {
13746
14198
  // If we get here, then we have interpolation so we'll need
13747
14199
  // to create a string or symbol node with interpolation.
@@ -13834,7 +14286,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13834
14286
  * Parse an expression that begins with the previous node that we just lexed.
13835
14287
  */
13836
14288
  static inline pm_node_t *
13837
- parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
14289
+ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
13838
14290
  switch (parser->current.type) {
13839
14291
  case PM_TOKEN_BRACKET_LEFT_ARRAY: {
13840
14292
  parser_lex(parser);
@@ -13866,9 +14318,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
13866
14318
  pm_node_t *expression = NULL;
13867
14319
 
13868
14320
  if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
13869
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
13870
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
13871
- }
14321
+ pm_parser_scope_forwarding_positionals_check(parser, &operator);
13872
14322
  } else {
13873
14323
  expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR);
13874
14324
  }
@@ -14113,7 +14563,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14113
14563
  if (
14114
14564
  match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
14115
14565
  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14116
- (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
14566
+ (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
14567
+ match1(parser, PM_TOKEN_BRACE_LEFT)
14117
14568
  ) {
14118
14569
  pm_arguments_t arguments = { 0 };
14119
14570
  parse_arguments_list(parser, &arguments, true, accepts_command_call);
@@ -14237,7 +14688,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14237
14688
  // a block, so we need to check for that here.
14238
14689
  if (
14239
14690
  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14240
- (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
14691
+ (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
14692
+ match1(parser, PM_TOKEN_BRACE_LEFT)
14241
14693
  ) {
14242
14694
  pm_arguments_t arguments = { 0 };
14243
14695
  parse_arguments_list(parser, &arguments, true, accepts_command_call);
@@ -14250,6 +14702,31 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14250
14702
 
14251
14703
  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
14252
14704
  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
14705
+ } else {
14706
+ // Check if `it` is not going to be assigned.
14707
+ switch (parser->current.type) {
14708
+ case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
14709
+ case PM_TOKEN_AMPERSAND_EQUAL:
14710
+ case PM_TOKEN_CARET_EQUAL:
14711
+ case PM_TOKEN_EQUAL:
14712
+ case PM_TOKEN_GREATER_GREATER_EQUAL:
14713
+ case PM_TOKEN_LESS_LESS_EQUAL:
14714
+ case PM_TOKEN_MINUS_EQUAL:
14715
+ case PM_TOKEN_PARENTHESIS_RIGHT:
14716
+ case PM_TOKEN_PERCENT_EQUAL:
14717
+ case PM_TOKEN_PIPE_EQUAL:
14718
+ case PM_TOKEN_PIPE_PIPE_EQUAL:
14719
+ case PM_TOKEN_PLUS_EQUAL:
14720
+ case PM_TOKEN_SLASH_EQUAL:
14721
+ case PM_TOKEN_STAR_EQUAL:
14722
+ case PM_TOKEN_STAR_STAR_EQUAL:
14723
+ break;
14724
+ default:
14725
+ // Once we know it's neither a method call nor an
14726
+ // assignment, we can finally create `it` default
14727
+ // parameter.
14728
+ node = pm_node_check_it(parser, node);
14729
+ }
14253
14730
  }
14254
14731
 
14255
14732
  return node;
@@ -14286,6 +14763,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14286
14763
  // If we get here, then we tried to find something in the
14287
14764
  // heredoc but couldn't actually parse anything, so we'll just
14288
14765
  // return a missing node.
14766
+ //
14767
+ // parse_string_part handles its own errors, so there is no need
14768
+ // for us to add one here.
14289
14769
  node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
14290
14770
  } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
14291
14771
  // If we get here, then the part that we parsed was plain string
@@ -14549,11 +15029,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14549
15029
  // for guard clauses in the form of `if` or `unless` statements.
14550
15030
  if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
14551
15031
  pm_token_t keyword = parser->previous;
14552
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
15032
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
14553
15033
  pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
14554
15034
  } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
14555
15035
  pm_token_t keyword = parser->previous;
14556
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
15036
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
14557
15037
  pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
14558
15038
  }
14559
15039
 
@@ -14742,8 +15222,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14742
15222
  pm_token_t operator = parser->previous;
14743
15223
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
14744
15224
 
14745
- pm_constant_id_t old_param_name = parser->current_param_name;
14746
- parser->current_param_name = 0;
15225
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
14747
15226
  pm_parser_scope_push(parser, true);
14748
15227
  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14749
15228
 
@@ -14760,11 +15239,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14760
15239
  }
14761
15240
 
14762
15241
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
14763
-
14764
15242
  pm_constant_id_list_t locals = parser->current_scope->locals;
15243
+
14765
15244
  pm_parser_scope_pop(parser);
14766
- parser->current_param_name = old_param_name;
14767
15245
  pm_do_loop_stack_pop(parser);
15246
+ pm_parser_current_param_name_restore(parser, saved_param_name);
15247
+
14768
15248
  return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
14769
15249
  }
14770
15250
 
@@ -14790,9 +15270,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14790
15270
  superclass = NULL;
14791
15271
  }
14792
15272
 
14793
- pm_constant_id_t old_param_name = parser->current_param_name;
14794
- parser->current_param_name = 0;
15273
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
14795
15274
  pm_parser_scope_push(parser, true);
15275
+
14796
15276
  if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
14797
15277
  expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
14798
15278
  } else {
@@ -14818,9 +15298,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14818
15298
  }
14819
15299
 
14820
15300
  pm_constant_id_list_t locals = parser->current_scope->locals;
15301
+
14821
15302
  pm_parser_scope_pop(parser);
14822
- parser->current_param_name = old_param_name;
14823
15303
  pm_do_loop_stack_pop(parser);
15304
+ pm_parser_current_param_name_restore(parser, saved_param_name);
14824
15305
 
14825
15306
  if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
14826
15307
  pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
@@ -14835,18 +15316,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14835
15316
  pm_token_t operator = not_provided(parser);
14836
15317
  pm_token_t name = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = def_keyword.end, .end = def_keyword.end };
14837
15318
 
14838
- // This context is necessary for lexing `...` in a bare params correctly.
14839
- // It must be pushed before lexing the first param, so it is here.
15319
+ // This context is necessary for lexing `...` in a bare params
15320
+ // correctly. It must be pushed before lexing the first param, so it
15321
+ // is here.
14840
15322
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
15323
+ pm_constant_id_t saved_param_name;
15324
+
14841
15325
  parser_lex(parser);
14842
- pm_constant_id_t old_param_name = parser->current_param_name;
14843
15326
 
14844
15327
  switch (parser->current.type) {
14845
15328
  case PM_CASE_OPERATOR:
15329
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14846
15330
  pm_parser_scope_push(parser, true);
14847
- parser->current_param_name = 0;
14848
15331
  lex_state_set(parser, PM_LEX_STATE_ENDFN);
14849
15332
  parser_lex(parser);
15333
+
14850
15334
  name = parser->previous;
14851
15335
  break;
14852
15336
  case PM_TOKEN_IDENTIFIER: {
@@ -14854,18 +15338,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14854
15338
 
14855
15339
  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
14856
15340
  receiver = parse_variable_call(parser);
15341
+ receiver = pm_node_check_it(parser, receiver);
14857
15342
 
15343
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14858
15344
  pm_parser_scope_push(parser, true);
14859
- parser->current_param_name = 0;
14860
15345
  lex_state_set(parser, PM_LEX_STATE_FNAME);
14861
15346
  parser_lex(parser);
14862
15347
 
14863
15348
  operator = parser->previous;
14864
15349
  name = parse_method_definition_name(parser);
14865
15350
  } else {
15351
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14866
15352
  pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
14867
15353
  pm_parser_scope_push(parser, true);
14868
- parser->current_param_name = 0;
15354
+
14869
15355
  name = parser->previous;
14870
15356
  }
14871
15357
 
@@ -14882,9 +15368,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14882
15368
  case PM_TOKEN_KEYWORD___FILE__:
14883
15369
  case PM_TOKEN_KEYWORD___LINE__:
14884
15370
  case PM_TOKEN_KEYWORD___ENCODING__: {
15371
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14885
15372
  pm_parser_scope_push(parser, true);
14886
- parser->current_param_name = 0;
14887
15373
  parser_lex(parser);
15374
+
14888
15375
  pm_token_t identifier = parser->previous;
14889
15376
 
14890
15377
  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
@@ -14946,6 +15433,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14946
15433
  pm_token_t lparen = parser->previous;
14947
15434
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_DEF_RECEIVER);
14948
15435
 
15436
+ accept1(parser, PM_TOKEN_NEWLINE);
14949
15437
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
14950
15438
  pm_token_t rparen = parser->previous;
14951
15439
 
@@ -14955,8 +15443,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14955
15443
  operator = parser->previous;
14956
15444
  receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
14957
15445
 
15446
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14958
15447
  pm_parser_scope_push(parser, true);
14959
- parser->current_param_name = 0;
14960
15448
 
14961
15449
  // To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as described the above.
14962
15450
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
@@ -14964,8 +15452,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14964
15452
  break;
14965
15453
  }
14966
15454
  default:
15455
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14967
15456
  pm_parser_scope_push(parser, true);
14968
- parser->current_param_name = 0;
15457
+
14969
15458
  name = parse_method_definition_name(parser);
14970
15459
  break;
14971
15460
  }
@@ -15018,8 +15507,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15018
15507
  }
15019
15508
  }
15020
15509
 
15021
- uint32_t locals_body_index = (uint32_t) parser->current_scope->locals.size;
15022
-
15023
15510
  context_pop(parser);
15024
15511
  pm_node_t *statements = NULL;
15025
15512
  pm_token_t equal;
@@ -15080,8 +15567,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15080
15567
  }
15081
15568
 
15082
15569
  pm_constant_id_list_t locals = parser->current_scope->locals;
15083
- parser->current_param_name = old_param_name;
15570
+
15084
15571
  pm_parser_scope_pop(parser);
15572
+ pm_parser_current_param_name_restore(parser, saved_param_name);
15573
+
15574
+ /**
15575
+ * If the final character is @. As is the case when defining
15576
+ * methods to override the unary operators, we should ignore
15577
+ * the @ in the same way we do for symbols.
15578
+ */
15579
+ name.end = parse_operator_symbol_name(&name);
15085
15580
 
15086
15581
  return (pm_node_t *) pm_def_node_create(
15087
15582
  parser,
@@ -15090,7 +15585,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15090
15585
  params,
15091
15586
  statements,
15092
15587
  &locals,
15093
- locals_body_index,
15094
15588
  &def_keyword,
15095
15589
  &operator,
15096
15590
  &lparen,
@@ -15309,9 +15803,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15309
15803
  pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
15310
15804
  }
15311
15805
 
15312
- pm_constant_id_t old_param_name = parser->current_param_name;
15313
- parser->current_param_name = 0;
15806
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
15314
15807
  pm_parser_scope_push(parser, true);
15808
+
15315
15809
  accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
15316
15810
  pm_node_t *statements = NULL;
15317
15811
 
@@ -15328,7 +15822,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15328
15822
 
15329
15823
  pm_constant_id_list_t locals = parser->current_scope->locals;
15330
15824
  pm_parser_scope_pop(parser);
15331
- parser->current_param_name = old_param_name;
15825
+ pm_parser_current_param_name_restore(parser, saved_param_name);
15332
15826
 
15333
15827
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
15334
15828
 
@@ -15914,6 +16408,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15914
16408
  // context of a multiple assignment. We enforce that here. We'll
15915
16409
  // still lex past it though and create a missing node place.
15916
16410
  if (binding_power != PM_BINDING_POWER_STATEMENT) {
16411
+ pm_parser_err_previous(parser, diag_id);
15917
16412
  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
15918
16413
  }
15919
16414
 
@@ -15995,7 +16490,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15995
16490
  parser_lex(parser);
15996
16491
 
15997
16492
  pm_token_t operator = parser->previous;
16493
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
15998
16494
  pm_parser_scope_push(parser, false);
16495
+
15999
16496
  pm_block_parameters_node_t *block_parameters;
16000
16497
 
16001
16498
  switch (parser->current.type) {
@@ -16030,12 +16527,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16030
16527
  }
16031
16528
  }
16032
16529
 
16033
- uint32_t locals_body_index = 0;
16034
-
16035
- if (block_parameters) {
16036
- locals_body_index = (uint32_t) parser->current_scope->locals.size;
16037
- }
16038
-
16039
16530
  pm_token_t opening;
16040
16531
  pm_node_t *body = NULL;
16041
16532
  parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
@@ -16070,13 +16561,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16070
16561
 
16071
16562
  if (parameters == NULL && (maximum > 0)) {
16072
16563
  parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = operator.start, .end = parser->previous.end }, maximum);
16073
- locals_body_index = maximum;
16074
16564
  }
16075
16565
 
16076
16566
  pm_constant_id_list_t locals = parser->current_scope->locals;
16567
+
16077
16568
  pm_parser_scope_pop(parser);
16078
16569
  pm_accepts_block_stack_pop(parser);
16079
- return (pm_node_t *) pm_lambda_node_create(parser, &locals, locals_body_index, &operator, &opening, &parser->previous, parameters, body);
16570
+ pm_parser_current_param_name_restore(parser, saved_param_name);
16571
+
16572
+ return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
16080
16573
  }
16081
16574
  case PM_TOKEN_UPLUS: {
16082
16575
  parser_lex(parser);
@@ -16095,12 +16588,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16095
16588
 
16096
16589
  return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END);
16097
16590
  }
16098
- default:
16099
- if (context_recoverable(parser, &parser->current)) {
16591
+ default: {
16592
+ pm_context_t recoverable = context_recoverable(parser, &parser->current);
16593
+
16594
+ if (recoverable != PM_CONTEXT_NONE) {
16100
16595
  parser->recovering = true;
16596
+
16597
+ // If the given error is not the generic one, then we'll add it
16598
+ // here because it will provide more context in addition to the
16599
+ // recoverable error that we will also add.
16600
+ if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
16601
+ pm_parser_err_previous(parser, diag_id);
16602
+ }
16603
+
16604
+ // If we get here, then we are assuming this token is closing a
16605
+ // parent context, so we'll indicate that to the user so that
16606
+ // they know how we behaved.
16607
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
16608
+ } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
16609
+ // We're going to make a special case here, because "cannot
16610
+ // parse expression" is pretty generic, and we know here that we
16611
+ // have an unexpected token.
16612
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
16613
+ } else {
16614
+ pm_parser_err_previous(parser, diag_id);
16101
16615
  }
16102
16616
 
16103
16617
  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
16618
+ }
16104
16619
  }
16105
16620
  }
16106
16621
 
@@ -17063,15 +17578,12 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17063
17578
  */
17064
17579
  static pm_node_t *
17065
17580
  parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
17066
- pm_token_t recovery = parser->previous;
17067
- pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
17581
+ pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, diag_id);
17068
17582
 
17069
17583
  switch (PM_NODE_TYPE(node)) {
17070
17584
  case PM_MISSING_NODE:
17071
17585
  // If we found a syntax error, then the type of node returned by
17072
- // parse_expression_prefix is going to be a missing node. In that
17073
- // case we need to add the error message to the parser's error list.
17074
- pm_parser_err(parser, recovery.end, recovery.end, diag_id);
17586
+ // parse_expression_prefix is going to be a missing node.
17075
17587
  return node;
17076
17588
  case PM_PRE_EXECUTION_NODE:
17077
17589
  case PM_POST_EXECUTION_NODE:
@@ -17080,7 +17592,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
17080
17592
  case PM_UNDEF_NODE:
17081
17593
  // These expressions are statements, and cannot be followed by
17082
17594
  // operators (except modifiers).
17083
- if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER_RESCUE) {
17595
+ if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
17084
17596
  return node;
17085
17597
  }
17086
17598
  break;
@@ -17175,9 +17687,14 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
17175
17687
 
17176
17688
  static pm_node_t *
17177
17689
  parse_program(pm_parser_t *parser) {
17178
- pm_parser_scope_push(parser, !parser->current_scope);
17179
- parser_lex(parser);
17690
+ // If the current scope is NULL, then we want to push a new top level scope.
17691
+ // The current scope could exist in the event that we are parsing an eval
17692
+ // and the user has passed into scopes that already exist.
17693
+ if (parser->current_scope == NULL) {
17694
+ pm_parser_scope_push(parser, true);
17695
+ }
17180
17696
 
17697
+ parser_lex(parser);
17181
17698
  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN);
17182
17699
  if (!statements) {
17183
17700
  statements = pm_statements_node_create(parser);
@@ -17248,8 +17765,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17248
17765
  .in_keyword_arg = false,
17249
17766
  .current_param_name = 0,
17250
17767
  .semantic_token_seen = false,
17251
- .frozen_string_literal = false,
17252
- .suppress_warnings = false
17768
+ .frozen_string_literal = false
17253
17769
  };
17254
17770
 
17255
17771
  // Initialize the constant pool. We're going to completely guess as to the
@@ -17295,10 +17811,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17295
17811
  parser->frozen_string_literal = true;
17296
17812
  }
17297
17813
 
17298
- // suppress_warnings option
17299
- if (options->suppress_warnings) {
17300
- parser->suppress_warnings = true;
17301
- }
17814
+ // version option
17815
+ parser->version = options->version;
17302
17816
 
17303
17817
  // scopes option
17304
17818
  for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
@@ -17484,3 +17998,297 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
17484
17998
  #undef PM_LOCATION_NODE_VALUE
17485
17999
  #undef PM_LOCATION_NULL_VALUE
17486
18000
  #undef PM_LOCATION_TOKEN_VALUE
18001
+
18002
+ /** An error that is going to be formatted into the output. */
18003
+ typedef struct {
18004
+ /** A pointer to the diagnostic that was generated during parsing. */
18005
+ pm_diagnostic_t *error;
18006
+
18007
+ /** The start line of the diagnostic message. */
18008
+ uint32_t line;
18009
+
18010
+ /** The column start of the diagnostic message. */
18011
+ uint32_t column_start;
18012
+
18013
+ /** The column end of the diagnostic message. */
18014
+ uint32_t column_end;
18015
+ } pm_error_t;
18016
+
18017
+ /** The format that will be used to format the errors into the output. */
18018
+ typedef struct {
18019
+ /** The prefix that will be used for line numbers. */
18020
+ const char *number_prefix;
18021
+
18022
+ /** The prefix that will be used for blank lines. */
18023
+ const char *blank_prefix;
18024
+
18025
+ /** The divider that will be used between sections of source code. */
18026
+ const char *divider;
18027
+
18028
+ /** The length of the blank prefix. */
18029
+ size_t blank_prefix_length;
18030
+
18031
+ /** The length of the divider. */
18032
+ size_t divider_length;
18033
+ } pm_error_format_t;
18034
+
18035
+ #define PM_COLOR_GRAY "\033[38;5;102m"
18036
+ #define PM_COLOR_RED "\033[1;31m"
18037
+ #define PM_COLOR_RESET "\033[0m"
18038
+
18039
+ static inline pm_error_t *
18040
+ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
18041
+ pm_error_t *errors = calloc(error_list->size, sizeof(pm_error_t));
18042
+
18043
+ for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
18044
+ pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start);
18045
+ pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end);
18046
+
18047
+ // We're going to insert this error into the array in sorted order. We
18048
+ // do this by finding the first error that has a line number greater
18049
+ // than the current error and then inserting the current error before
18050
+ // that one.
18051
+ size_t index = 0;
18052
+ while (
18053
+ (index < error_list->size) &&
18054
+ (errors[index].error != NULL) &&
18055
+ (
18056
+ (errors[index].line < ((uint32_t) start.line)) ||
18057
+ (errors[index].line == ((uint32_t) start.line) && errors[index].column_start < ((uint32_t) start.column))
18058
+ )
18059
+ ) index++;
18060
+
18061
+ // Now we're going to shift all of the errors after this one down one
18062
+ // index to make room for the new error.
18063
+ memcpy(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
18064
+
18065
+ // Finally, we'll insert the error into the array.
18066
+ uint32_t column_end;
18067
+ if (start.line == end.line) {
18068
+ column_end = (uint32_t) end.column;
18069
+ } else {
18070
+ column_end = (uint32_t) (newline_list->offsets[start.line] - newline_list->offsets[start.line - 1] - 1);
18071
+ }
18072
+
18073
+ // Ensure we have at least one column of error.
18074
+ if (((uint32_t) start.column) == column_end) column_end++;
18075
+
18076
+ errors[index] = (pm_error_t) {
18077
+ .error = error,
18078
+ .line = (uint32_t) start.line,
18079
+ .column_start = (uint32_t) start.column,
18080
+ .column_end = column_end
18081
+ };
18082
+ }
18083
+
18084
+ return errors;
18085
+ }
18086
+
18087
+ static inline void
18088
+ pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, size_t line, pm_buffer_t *buffer) {
18089
+ const uint8_t *start = &parser->start[newline_list->offsets[line - 1]];
18090
+ const uint8_t *end;
18091
+
18092
+ if (line >= newline_list->size) {
18093
+ end = parser->end;
18094
+ } else {
18095
+ end = &parser->start[newline_list->offsets[line]];
18096
+ }
18097
+
18098
+ pm_buffer_append_format(buffer, number_prefix, (uint32_t) line);
18099
+ pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
18100
+
18101
+ if (end == parser->end && end[-1] != '\n') {
18102
+ pm_buffer_append_string(buffer, "\n", 1);
18103
+ }
18104
+ }
18105
+
18106
+ /**
18107
+ * Format the errors on the parser into the given buffer.
18108
+ */
18109
+ PRISM_EXPORTED_FUNCTION void
18110
+ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize) {
18111
+ const pm_list_t *error_list = &parser->error_list;
18112
+ assert(error_list->size != 0);
18113
+
18114
+ // First, we're going to sort all of the errors by line number using an
18115
+ // insertion sort into a newly allocated array.
18116
+ const pm_newline_list_t *newline_list = &parser->newline_list;
18117
+ pm_error_t *errors = pm_parser_errors_format_sort(error_list, newline_list);
18118
+
18119
+ // Now we're going to determine how we're going to format line numbers and
18120
+ // blank lines based on the maximum number of digits in the line numbers
18121
+ // that are going to be displayed.
18122
+ pm_error_format_t error_format;
18123
+ size_t max_line_number = errors[error_list->size - 1].line;
18124
+
18125
+ if (max_line_number < 10) {
18126
+ if (colorize) {
18127
+ error_format = (pm_error_format_t) {
18128
+ .number_prefix = PM_COLOR_GRAY "%1" PRIu32 " | " PM_COLOR_RESET,
18129
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18130
+ .divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
18131
+ };
18132
+ } else {
18133
+ error_format = (pm_error_format_t) {
18134
+ .number_prefix = "%1" PRIu32 " | ",
18135
+ .blank_prefix = " | ",
18136
+ .divider = " ~~~~~\n"
18137
+ };
18138
+ }
18139
+ } else if (max_line_number < 100) {
18140
+ if (colorize) {
18141
+ error_format = (pm_error_format_t) {
18142
+ .number_prefix = PM_COLOR_GRAY "%2" PRIu32 " | " PM_COLOR_RESET,
18143
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18144
+ .divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
18145
+ };
18146
+ } else {
18147
+ error_format = (pm_error_format_t) {
18148
+ .number_prefix = "%2" PRIu32 " | ",
18149
+ .blank_prefix = " | ",
18150
+ .divider = " ~~~~~~\n"
18151
+ };
18152
+ }
18153
+ } else if (max_line_number < 1000) {
18154
+ if (colorize) {
18155
+ error_format = (pm_error_format_t) {
18156
+ .number_prefix = PM_COLOR_GRAY "%3" PRIu32 " | " PM_COLOR_RESET,
18157
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18158
+ .divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
18159
+ };
18160
+ } else {
18161
+ error_format = (pm_error_format_t) {
18162
+ .number_prefix = "%3" PRIu32 " | ",
18163
+ .blank_prefix = " | ",
18164
+ .divider = " ~~~~~~~\n"
18165
+ };
18166
+ }
18167
+ } else if (max_line_number < 10000) {
18168
+ if (colorize) {
18169
+ error_format = (pm_error_format_t) {
18170
+ .number_prefix = PM_COLOR_GRAY "%4" PRIu32 " | " PM_COLOR_RESET,
18171
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18172
+ .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
18173
+ };
18174
+ } else {
18175
+ error_format = (pm_error_format_t) {
18176
+ .number_prefix = "%4" PRIu32 " | ",
18177
+ .blank_prefix = " | ",
18178
+ .divider = " ~~~~~~~~\n"
18179
+ };
18180
+ }
18181
+ } else {
18182
+ if (colorize) {
18183
+ error_format = (pm_error_format_t) {
18184
+ .number_prefix = PM_COLOR_GRAY "%5" PRIu32 " | " PM_COLOR_RESET,
18185
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18186
+ .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
18187
+ };
18188
+ } else {
18189
+ error_format = (pm_error_format_t) {
18190
+ .number_prefix = "%5" PRIu32 " | ",
18191
+ .blank_prefix = " | ",
18192
+ .divider = " ~~~~~~~~\n"
18193
+ };
18194
+ }
18195
+ }
18196
+
18197
+ error_format.blank_prefix_length = strlen(error_format.blank_prefix);
18198
+ error_format.divider_length = strlen(error_format.divider);
18199
+
18200
+ // Now we're going to iterate through every error in our error list and
18201
+ // display it. While we're iterating, we will display some padding lines of
18202
+ // the source before the error to give some context. We'll be careful not to
18203
+ // display the same line twice in case the errors are close enough in the
18204
+ // source.
18205
+ uint32_t last_line = 0;
18206
+ const pm_encoding_t *encoding = parser->encoding;
18207
+
18208
+ for (size_t index = 0; index < error_list->size; index++) {
18209
+ pm_error_t *error = &errors[index];
18210
+
18211
+ // Here we determine how many lines of padding of the source to display,
18212
+ // based on the difference from the last line that was displayed.
18213
+ if (error->line - last_line > 1) {
18214
+ if (error->line - last_line > 2) {
18215
+ if ((index != 0) && (error->line - last_line > 3)) {
18216
+ pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
18217
+ }
18218
+
18219
+ pm_buffer_append_string(buffer, " ", 2);
18220
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
18221
+ }
18222
+
18223
+ pm_buffer_append_string(buffer, " ", 2);
18224
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
18225
+ }
18226
+
18227
+ // If this is the first error or we're on a new line, then we'll display
18228
+ // the line that has the error in it.
18229
+ if ((index == 0) || (error->line != last_line)) {
18230
+ if (colorize) {
18231
+ pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 13);
18232
+ } else {
18233
+ pm_buffer_append_string(buffer, "> ", 2);
18234
+ }
18235
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
18236
+ }
18237
+
18238
+ // Now we'll display the actual error message. We'll do this by first
18239
+ // putting the prefix to the line, then a bunch of blank spaces
18240
+ // depending on the column, then as many carets as we need to display
18241
+ // the width of the error, then the error message itself.
18242
+ //
18243
+ // Note that this doesn't take into account the width of the actual
18244
+ // character when displayed in the terminal. For some east-asian
18245
+ // languages or emoji, this means it can be thrown off pretty badly. We
18246
+ // will need to solve this eventually.
18247
+ pm_buffer_append_string(buffer, " ", 2);
18248
+ pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
18249
+
18250
+ size_t column = 0;
18251
+ const uint8_t *start = &parser->start[newline_list->offsets[error->line - 1]];
18252
+
18253
+ while (column < error->column_end) {
18254
+ if (column < error->column_start) {
18255
+ pm_buffer_append_byte(buffer, ' ');
18256
+ } else if (colorize) {
18257
+ pm_buffer_append_string(buffer, PM_COLOR_RED "^" PM_COLOR_RESET, 12);
18258
+ } else {
18259
+ pm_buffer_append_byte(buffer, '^');
18260
+ }
18261
+
18262
+ size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
18263
+ column += (char_width == 0 ? 1 : char_width);
18264
+ }
18265
+
18266
+ pm_buffer_append_byte(buffer, ' ');
18267
+
18268
+ const char *message = error->error->message;
18269
+ pm_buffer_append_string(buffer, message, strlen(message));
18270
+ pm_buffer_append_byte(buffer, '\n');
18271
+
18272
+ // Here we determine how many lines of padding to display after the
18273
+ // error, depending on where the next error is in source.
18274
+ last_line = error->line;
18275
+ size_t next_line = (index == error_list->size - 1) ? newline_list->size : errors[index + 1].line;
18276
+
18277
+ if (next_line - last_line > 1) {
18278
+ pm_buffer_append_string(buffer, " ", 2);
18279
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
18280
+ }
18281
+
18282
+ if (next_line - last_line > 1) {
18283
+ pm_buffer_append_string(buffer, " ", 2);
18284
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
18285
+ }
18286
+ }
18287
+
18288
+ // Finally, we'll free the array of errors that we allocated.
18289
+ free(errors);
18290
+ }
18291
+
18292
+ #undef PM_COLOR_GRAY
18293
+ #undef PM_COLOR_RED
18294
+ #undef PM_COLOR_RESET