prism 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +29 -1
  3. data/Makefile +5 -0
  4. data/README.md +8 -6
  5. data/config.yml +236 -38
  6. data/docs/build_system.md +19 -2
  7. data/docs/cruby_compilation.md +27 -0
  8. data/docs/parser_translation.md +34 -0
  9. data/docs/parsing_rules.md +19 -0
  10. data/docs/releasing.md +3 -3
  11. data/docs/ruby_api.md +1 -1
  12. data/docs/serialization.md +17 -5
  13. data/ext/prism/api_node.c +101 -81
  14. data/ext/prism/extension.c +74 -11
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +1699 -504
  17. data/include/prism/defines.h +8 -0
  18. data/include/prism/diagnostic.h +39 -2
  19. data/include/prism/encoding.h +10 -0
  20. data/include/prism/options.h +40 -14
  21. data/include/prism/parser.h +33 -17
  22. data/include/prism/util/pm_buffer.h +9 -0
  23. data/include/prism/util/pm_constant_pool.h +7 -0
  24. data/include/prism/util/pm_newline_list.h +0 -11
  25. data/include/prism/version.h +2 -2
  26. data/include/prism.h +19 -2
  27. data/lib/prism/debug.rb +11 -5
  28. data/lib/prism/dot_visitor.rb +36 -14
  29. data/lib/prism/dsl.rb +22 -22
  30. data/lib/prism/ffi.rb +2 -2
  31. data/lib/prism/node.rb +1020 -737
  32. data/lib/prism/node_ext.rb +2 -2
  33. data/lib/prism/parse_result.rb +17 -9
  34. data/lib/prism/serialize.rb +53 -29
  35. data/lib/prism/translation/parser/compiler.rb +1831 -0
  36. data/lib/prism/translation/parser/lexer.rb +335 -0
  37. data/lib/prism/translation/parser/rubocop.rb +37 -0
  38. data/lib/prism/translation/parser.rb +163 -0
  39. data/lib/prism/translation.rb +11 -0
  40. data/lib/prism.rb +1 -0
  41. data/prism.gemspec +12 -5
  42. data/rbi/prism.rbi +150 -88
  43. data/rbi/prism_static.rbi +15 -3
  44. data/sig/prism.rbs +996 -961
  45. data/sig/prism_static.rbs +123 -46
  46. data/src/diagnostic.c +259 -219
  47. data/src/encoding.c +4 -8
  48. data/src/node.c +2 -6
  49. data/src/options.c +24 -5
  50. data/src/prettyprint.c +174 -42
  51. data/src/prism.c +1136 -328
  52. data/src/serialize.c +12 -9
  53. data/src/token_type.c +353 -4
  54. data/src/util/pm_buffer.c +11 -0
  55. data/src/util/pm_constant_pool.c +12 -11
  56. data/src/util/pm_newline_list.c +2 -14
  57. metadata +10 -3
  58. data/docs/building.md +0 -29
data/src/prism.c CHANGED
@@ -164,7 +164,7 @@ debug_state(pm_parser_t *parser) {
164
164
 
165
165
  PRISM_ATTRIBUTE_UNUSED static void
166
166
  debug_token(pm_token_t * token) {
167
- fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_to_str(token->type), (int) (token->end - token->start), token->start);
167
+ fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_human(token->type), (int) (token->end - token->start), token->start);
168
168
  }
169
169
 
170
170
  #endif
@@ -423,6 +423,11 @@ lex_state_beg_p(pm_parser_t *parser) {
423
423
  return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
424
424
  }
425
425
 
426
+ static inline bool
427
+ lex_state_arg_labeled_p(pm_parser_t *parser) {
428
+ return (parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
429
+ }
430
+
426
431
  static inline bool
427
432
  lex_state_arg_p(pm_parser_t *parser) {
428
433
  return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
@@ -548,9 +553,7 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
548
553
  */
549
554
  static inline void
550
555
  pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
551
- if (!parser->suppress_warnings) {
552
- pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
553
- }
556
+ pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
554
557
  }
555
558
 
556
559
  /**
@@ -813,6 +816,9 @@ typedef struct {
813
816
 
814
817
  /** The optional block attached to the call. */
815
818
  pm_node_t *block;
819
+
820
+ /** The flag indicating whether this arguments list has forwarding argument. */
821
+ bool has_forwarding;
816
822
  } pm_arguments_t;
817
823
 
818
824
  /**
@@ -884,6 +890,22 @@ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
884
890
  node->flags &= (pm_node_flags_t) ~flag;
885
891
  }
886
892
 
893
+ /**
894
+ * Set the repeated parameter flag on the given node.
895
+ */
896
+ static inline void
897
+ pm_node_flag_set_repeated_parameter(pm_node_t *node) {
898
+ assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
899
+ PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
900
+ PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
901
+ PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
902
+ PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
903
+ PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
904
+ PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
905
+ PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
906
+
907
+ pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
908
+ }
887
909
 
888
910
  /******************************************************************************/
889
911
  /* Node creation functions */
@@ -977,7 +999,7 @@ static inline void *
977
999
  pm_alloc_node(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
978
1000
  void *memory = calloc(1, size);
979
1001
  if (memory == NULL) {
980
- fprintf(stderr, "Failed to allocate %zu bytes\n", size);
1002
+ fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
981
1003
  abort();
982
1004
  }
983
1005
  return memory;
@@ -1325,7 +1347,7 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
1325
1347
  pm_assoc_node_t *node = PM_ALLOC_NODE(parser, pm_assoc_node_t);
1326
1348
  const uint8_t *end;
1327
1349
 
1328
- if (value != NULL) {
1350
+ if (value != NULL && value->location.end > key->location.end) {
1329
1351
  end = value->location.end;
1330
1352
  } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
1331
1353
  end = operator->end;
@@ -1333,6 +1355,13 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
1333
1355
  end = key->location.end;
1334
1356
  }
1335
1357
 
1358
+ // Hash string keys will be frozen, so we can mark them as frozen here so
1359
+ // that the compiler picks them up and also when we check for static literal
1360
+ // on the keys it gets factored in.
1361
+ if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
1362
+ key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
1363
+ }
1364
+
1336
1365
  // If the key and value of this assoc node are both static literals, then
1337
1366
  // we can mark this node as a static literal.
1338
1367
  pm_node_flags_t flags = 0;
@@ -1490,7 +1519,7 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p
1490
1519
  * Allocate and initialize a new BlockNode node.
1491
1520
  */
1492
1521
  static pm_block_node_t *
1493
- pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_t locals_body_index, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
1522
+ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
1494
1523
  pm_block_node_t *node = PM_ALLOC_NODE(parser, pm_block_node_t);
1495
1524
 
1496
1525
  *node = (pm_block_node_t) {
@@ -1499,7 +1528,6 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_
1499
1528
  .location = { .start = opening->start, .end = closing->end },
1500
1529
  },
1501
1530
  .locals = *locals,
1502
- .locals_body_index = locals_body_index,
1503
1531
  .parameters = parameters,
1504
1532
  .body = body,
1505
1533
  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
@@ -1645,12 +1673,13 @@ pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
1645
1673
  * in the various specializations of this function.
1646
1674
  */
1647
1675
  static pm_call_node_t *
1648
- pm_call_node_create(pm_parser_t *parser) {
1676
+ pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
1649
1677
  pm_call_node_t *node = PM_ALLOC_NODE(parser, pm_call_node_t);
1650
1678
 
1651
1679
  *node = (pm_call_node_t) {
1652
1680
  {
1653
1681
  .type = PM_CALL_NODE,
1682
+ .flags = flags,
1654
1683
  .location = PM_LOCATION_NULL_VALUE(parser),
1655
1684
  },
1656
1685
  .receiver = NULL,
@@ -1666,6 +1695,15 @@ pm_call_node_create(pm_parser_t *parser) {
1666
1695
  return node;
1667
1696
  }
1668
1697
 
1698
+ /**
1699
+ * Returns the value that the ignore visibility flag should be set to for the
1700
+ * given receiver.
1701
+ */
1702
+ static inline pm_node_flags_t
1703
+ pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
1704
+ return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
1705
+ }
1706
+
1669
1707
  /**
1670
1708
  * Allocate and initialize a new CallNode node from an aref or an aset
1671
1709
  * expression.
@@ -1674,7 +1712,7 @@ static pm_call_node_t *
1674
1712
  pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
1675
1713
  pm_assert_value_expression(parser, receiver);
1676
1714
 
1677
- pm_call_node_t *node = pm_call_node_create(parser);
1715
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1678
1716
 
1679
1717
  node->base.location.start = receiver->location.start;
1680
1718
  node->base.location.end = pm_arguments_end(arguments);
@@ -1700,7 +1738,7 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
1700
1738
  pm_assert_value_expression(parser, receiver);
1701
1739
  pm_assert_value_expression(parser, argument);
1702
1740
 
1703
- pm_call_node_t *node = pm_call_node_create(parser);
1741
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1704
1742
 
1705
1743
  node->base.location.start = MIN(receiver->location.start, argument->location.start);
1706
1744
  node->base.location.end = MAX(receiver->location.end, argument->location.end);
@@ -1723,7 +1761,7 @@ static pm_call_node_t *
1723
1761
  pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
1724
1762
  pm_assert_value_expression(parser, receiver);
1725
1763
 
1726
- pm_call_node_t *node = pm_call_node_create(parser);
1764
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1727
1765
 
1728
1766
  node->base.location.start = receiver->location.start;
1729
1767
  const uint8_t *end = pm_arguments_end(arguments);
@@ -1754,7 +1792,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
1754
1792
  */
1755
1793
  static pm_call_node_t *
1756
1794
  pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
1757
- pm_call_node_t *node = pm_call_node_create(parser);
1795
+ pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
1758
1796
 
1759
1797
  node->base.location.start = message->start;
1760
1798
  node->base.location.end = pm_arguments_end(arguments);
@@ -1776,7 +1814,7 @@ static pm_call_node_t *
1776
1814
  pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
1777
1815
  pm_assert_value_expression(parser, receiver);
1778
1816
 
1779
- pm_call_node_t *node = pm_call_node_create(parser);
1817
+ pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
1780
1818
 
1781
1819
  node->base.location.start = message->start;
1782
1820
  if (arguments->closing_loc.start != NULL) {
@@ -1802,7 +1840,7 @@ static pm_call_node_t *
1802
1840
  pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
1803
1841
  pm_assert_value_expression(parser, receiver);
1804
1842
 
1805
- pm_call_node_t *node = pm_call_node_create(parser);
1843
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1806
1844
 
1807
1845
  node->base.location.start = receiver->location.start;
1808
1846
  node->base.location.end = pm_arguments_end(arguments);
@@ -1829,7 +1867,7 @@ static pm_call_node_t *
1829
1867
  pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
1830
1868
  pm_assert_value_expression(parser, receiver);
1831
1869
 
1832
- pm_call_node_t *node = pm_call_node_create(parser);
1870
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1833
1871
 
1834
1872
  node->base.location.start = operator->start;
1835
1873
  node->base.location.end = receiver->location.end;
@@ -1847,7 +1885,7 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *
1847
1885
  */
1848
1886
  static pm_call_node_t *
1849
1887
  pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
1850
- pm_call_node_t *node = pm_call_node_create(parser);
1888
+ pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
1851
1889
 
1852
1890
  node->base.location = PM_LOCATION_TOKEN_VALUE(message);
1853
1891
  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
@@ -2167,11 +2205,12 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
2167
2205
  static pm_index_target_node_t *
2168
2206
  pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
2169
2207
  pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
2208
+ pm_node_flags_t flags = target->base.flags;
2170
2209
 
2171
2210
  *node = (pm_index_target_node_t) {
2172
2211
  {
2173
2212
  .type = PM_INDEX_TARGET_NODE,
2174
- .flags = target->base.flags,
2213
+ .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
2175
2214
  .location = target->base.location
2176
2215
  },
2177
2216
  .receiver = target->receiver,
@@ -2701,6 +2740,45 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ
2701
2740
  return node;
2702
2741
  }
2703
2742
 
2743
+ /**
2744
+ * Check if the receiver of a `def` node is allowed.
2745
+ */
2746
+ static void
2747
+ pm_check_def_receiver(pm_parser_t *parser, pm_node_t *receiver) {
2748
+ switch (receiver->type) {
2749
+ case PM_BEGIN_NODE: {
2750
+ pm_begin_node_t *begin_node = (pm_begin_node_t *)receiver;
2751
+ pm_check_def_receiver(parser, (pm_node_t *) begin_node->statements);
2752
+ break;
2753
+ }
2754
+ case PM_PARENTHESES_NODE:
2755
+ pm_check_def_receiver(parser, ((pm_parentheses_node_t *) receiver)->body);
2756
+ break;
2757
+ case PM_STATEMENTS_NODE: {
2758
+ pm_statements_node_t *statements_node = (pm_statements_node_t *)receiver;
2759
+ pm_check_def_receiver(parser, statements_node->body.nodes[statements_node->body.size - 1]);
2760
+ break;
2761
+ }
2762
+ case PM_ARRAY_NODE:
2763
+ case PM_FLOAT_NODE:
2764
+ case PM_IMAGINARY_NODE:
2765
+ case PM_INTEGER_NODE:
2766
+ case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
2767
+ case PM_INTERPOLATED_STRING_NODE:
2768
+ case PM_INTERPOLATED_SYMBOL_NODE:
2769
+ case PM_INTERPOLATED_X_STRING_NODE:
2770
+ case PM_RATIONAL_NODE:
2771
+ case PM_REGULAR_EXPRESSION_NODE:
2772
+ case PM_SOURCE_ENCODING_NODE:
2773
+ case PM_SOURCE_FILE_NODE:
2774
+ case PM_SOURCE_LINE_NODE:
2775
+ case PM_STRING_NODE:
2776
+ case PM_SYMBOL_NODE:
2777
+ case PM_X_STRING_NODE:
2778
+ pm_parser_err_node(parser, receiver, PM_ERR_SINGLETON_FOR_LITERALS);
2779
+ }
2780
+ }
2781
+
2704
2782
  /**
2705
2783
  * Allocate and initialize a new DefNode node.
2706
2784
  */
@@ -2712,7 +2790,6 @@ pm_def_node_create(
2712
2790
  pm_parameters_node_t *parameters,
2713
2791
  pm_node_t *body,
2714
2792
  pm_constant_id_list_t *locals,
2715
- uint32_t locals_body_index,
2716
2793
  const pm_token_t *def_keyword,
2717
2794
  const pm_token_t *operator,
2718
2795
  const pm_token_t *lparen,
@@ -2729,6 +2806,10 @@ pm_def_node_create(
2729
2806
  end = end_keyword->end;
2730
2807
  }
2731
2808
 
2809
+ if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
2810
+ pm_check_def_receiver(parser, receiver);
2811
+ }
2812
+
2732
2813
  *node = (pm_def_node_t) {
2733
2814
  {
2734
2815
  .type = PM_DEF_NODE,
@@ -2740,7 +2821,6 @@ pm_def_node_create(
2740
2821
  .parameters = parameters,
2741
2822
  .body = body,
2742
2823
  .locals = *locals,
2743
- .locals_body_index = locals_body_index,
2744
2824
  .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
2745
2825
  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2746
2826
  .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
@@ -3962,9 +4042,8 @@ pm_keyword_hash_node_create(pm_parser_t *parser) {
3962
4042
  */
3963
4043
  static void
3964
4044
  pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
3965
- // If the element being added is not an AssocNode or does not have a symbol key, then
3966
- // we want to turn the STATIC_KEYS flag off.
3967
- // TODO: Rename the flag to SYMBOL_KEYS instead.
4045
+ // If the element being added is not an AssocNode or does not have a symbol
4046
+ // key, then we want to turn the SYMBOL_KEYS flag off.
3968
4047
  if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
3969
4048
  pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
3970
4049
  }
@@ -4051,7 +4130,6 @@ static pm_lambda_node_t *
4051
4130
  pm_lambda_node_create(
4052
4131
  pm_parser_t *parser,
4053
4132
  pm_constant_id_list_t *locals,
4054
- uint32_t locals_body_index,
4055
4133
  const pm_token_t *operator,
4056
4134
  const pm_token_t *opening,
4057
4135
  const pm_token_t *closing,
@@ -4069,7 +4147,6 @@ pm_lambda_node_create(
4069
4147
  },
4070
4148
  },
4071
4149
  .locals = *locals,
4072
- .locals_body_index = locals_body_index,
4073
4150
  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4074
4151
  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4075
4152
  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
@@ -4161,12 +4238,10 @@ pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, c
4161
4238
  }
4162
4239
 
4163
4240
  /**
4164
- * Allocate a new LocalVariableReadNode node.
4241
+ * Allocate a new LocalVariableReadNode node with constant_id.
4165
4242
  */
4166
4243
  static pm_local_variable_read_node_t *
4167
- pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
4168
- pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
4169
-
4244
+ pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth) {
4170
4245
  if (parser->current_param_name == name_id) {
4171
4246
  pm_parser_err_token(parser, name, PM_ERR_PARAMETER_CIRCULAR);
4172
4247
  }
@@ -4185,6 +4260,15 @@ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name,
4185
4260
  return node;
4186
4261
  }
4187
4262
 
4263
+ /**
4264
+ * Allocate a new LocalVariableReadNode node.
4265
+ */
4266
+ static pm_local_variable_read_node_t *
4267
+ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
4268
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
4269
+ return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth);
4270
+ }
4271
+
4188
4272
  /**
4189
4273
  * Allocate and initialize a new LocalVariableWriteNode node.
4190
4274
  */
@@ -4210,6 +4294,57 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name,
4210
4294
  return node;
4211
4295
  }
4212
4296
 
4297
+ /**
4298
+ * Returns true if the given bounds comprise `it`.
4299
+ */
4300
+ static inline bool
4301
+ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
4302
+ return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
4303
+ }
4304
+
4305
+ /**
4306
+ * Returns true if the given node is `it` default parameter.
4307
+ */
4308
+ static inline bool
4309
+ pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
4310
+ // Check if it's a local variable reference
4311
+ if (node->type != PM_CALL_NODE) {
4312
+ return false;
4313
+ }
4314
+
4315
+ // Check if it's a variable call
4316
+ pm_call_node_t *call_node = (pm_call_node_t *) node;
4317
+ if (!pm_call_node_variable_call_p(call_node)) {
4318
+ return false;
4319
+ }
4320
+
4321
+ // Check if it's called `it`
4322
+ pm_constant_id_t id = ((pm_call_node_t *)node)->name;
4323
+ pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
4324
+ return pm_token_is_it(constant->start, constant->start + constant->length);
4325
+ }
4326
+
4327
+ /**
4328
+ * Convert a `it` variable call node to a node for `it` default parameter.
4329
+ */
4330
+ static pm_node_t *
4331
+ pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
4332
+ if (
4333
+ (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
4334
+ !parser->current_scope->closed &&
4335
+ pm_node_is_it(parser, node)
4336
+ ) {
4337
+ if (parser->current_scope->explicit_params) {
4338
+ pm_parser_err_previous(parser, PM_ERR_IT_NOT_ALLOWED);
4339
+ } else {
4340
+ pm_node_destroy(parser, node);
4341
+ pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
4342
+ node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
4343
+ }
4344
+ }
4345
+ return node;
4346
+ }
4347
+
4213
4348
  /**
4214
4349
  * Returns true if the given bounds comprise a numbered parameter (i.e., they
4215
4350
  * are of the form /^_\d$/).
@@ -5372,18 +5507,59 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
5372
5507
  return node;
5373
5508
  }
5374
5509
 
5510
+ /**
5511
+ * Read through the contents of a string and check if it consists solely of US ASCII code points.
5512
+ */
5513
+ static bool
5514
+ pm_ascii_only_p(const pm_string_t *contents) {
5515
+ const size_t length = pm_string_length(contents);
5516
+ const uint8_t *source = pm_string_source(contents);
5517
+
5518
+ for (size_t index = 0; index < length; index++) {
5519
+ if (source[index] & 0x80) return false;
5520
+ }
5521
+
5522
+ return true;
5523
+ }
5524
+
5525
+ /**
5526
+ * Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
5527
+ * encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
5528
+ * points. Otherwise, the encoding may be explicitly set with an escape
5529
+ * sequence.
5530
+ */
5531
+ static inline pm_node_flags_t
5532
+ parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
5533
+ if (parser->explicit_encoding != NULL) {
5534
+ // A Symbol may optionally have its encoding explicitly set. This will
5535
+ // happen if an escape sequence results in a non-ASCII code point.
5536
+ if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
5537
+ return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
5538
+ } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
5539
+ return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
5540
+ }
5541
+ } else if (pm_ascii_only_p(contents)) {
5542
+ // Ruby stipulates that all source files must use an ASCII-compatible
5543
+ // encoding. Thus, all symbols appearing in source are eligible for
5544
+ // "downgrading" to US-ASCII.
5545
+ return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
5546
+ }
5547
+
5548
+ return 0;
5549
+ }
5550
+
5375
5551
  /**
5376
5552
  * Allocate and initialize a new SymbolNode node with the given unescaped
5377
5553
  * string.
5378
5554
  */
5379
5555
  static pm_symbol_node_t *
5380
- pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped) {
5556
+ pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
5381
5557
  pm_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
5382
5558
 
5383
5559
  *node = (pm_symbol_node_t) {
5384
5560
  {
5385
5561
  .type = PM_SYMBOL_NODE,
5386
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
5562
+ .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
5387
5563
  .location = {
5388
5564
  .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
5389
5565
  .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
@@ -5403,7 +5579,7 @@ pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening,
5403
5579
  */
5404
5580
  static inline pm_symbol_node_t *
5405
5581
  pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
5406
- return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY);
5582
+ return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
5407
5583
  }
5408
5584
 
5409
5585
  /**
@@ -5411,7 +5587,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
5411
5587
  */
5412
5588
  static pm_symbol_node_t *
5413
5589
  pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
5414
- pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string);
5590
+ pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
5415
5591
  parser->current_string = PM_STRING_EMPTY;
5416
5592
  return node;
5417
5593
  }
@@ -5433,6 +5609,8 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
5433
5609
 
5434
5610
  assert((label.end - label.start) >= 0);
5435
5611
  pm_string_shared_init(&node->unescaped, label.start, label.end);
5612
+ pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
5613
+
5436
5614
  break;
5437
5615
  }
5438
5616
  case PM_TOKEN_MISSING: {
@@ -5495,6 +5673,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
5495
5673
  .unescaped = node->unescaped
5496
5674
  };
5497
5675
 
5676
+ pm_node_flag_set((pm_node_t *)new_node, parse_symbol_encoding(parser, &node->unescaped));
5677
+
5498
5678
  // We are explicitly _not_ using pm_node_destroy here because we don't want
5499
5679
  // to trash the unescaped string. We could instead copy the string if we
5500
5680
  // know that it is owned, but we're taking the fast path for now.
@@ -5885,6 +6065,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
5885
6065
  .closed = closed,
5886
6066
  .explicit_params = false,
5887
6067
  .numbered_parameters = 0,
6068
+ .forwarding_params = 0,
5888
6069
  };
5889
6070
 
5890
6071
  pm_constant_id_list_init(&scope->locals);
@@ -5893,6 +6074,76 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
5893
6074
  return true;
5894
6075
  }
5895
6076
 
6077
+ static void
6078
+ pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag)
6079
+ {
6080
+ pm_scope_t *scope = parser->current_scope;
6081
+ while (scope) {
6082
+ if (scope->forwarding_params & mask) {
6083
+ if (!scope->closed) {
6084
+ pm_parser_err_token(parser, token, diag);
6085
+ return;
6086
+ }
6087
+ return;
6088
+ }
6089
+ if (scope->closed) break;
6090
+ scope = scope->previous;
6091
+ }
6092
+
6093
+ pm_parser_err_token(parser, token, diag);
6094
+ }
6095
+
6096
+ static inline void
6097
+ pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token)
6098
+ {
6099
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
6100
+ }
6101
+
6102
+ static void
6103
+ pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token)
6104
+ {
6105
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
6106
+ }
6107
+
6108
+ static inline void
6109
+ pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token)
6110
+ {
6111
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
6112
+ }
6113
+
6114
+ static inline void
6115
+ pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token)
6116
+ {
6117
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_KEYWORDS, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
6118
+ }
6119
+
6120
+ /**
6121
+ * Save the current param name as the return value and set it to the given
6122
+ * constant id.
6123
+ */
6124
+ static inline pm_constant_id_t
6125
+ pm_parser_current_param_name_set(pm_parser_t *parser, pm_constant_id_t current_param_name) {
6126
+ pm_constant_id_t saved_param_name = parser->current_param_name;
6127
+ parser->current_param_name = current_param_name;
6128
+ return saved_param_name;
6129
+ }
6130
+
6131
+ /**
6132
+ * Save the current param name as the return value and clear it.
6133
+ */
6134
+ static inline pm_constant_id_t
6135
+ pm_parser_current_param_name_unset(pm_parser_t *parser) {
6136
+ return pm_parser_current_param_name_set(parser, PM_CONSTANT_ID_UNSET);
6137
+ }
6138
+
6139
+ /**
6140
+ * Restore the current param name from the given value.
6141
+ */
6142
+ static inline void
6143
+ pm_parser_current_param_name_restore(pm_parser_t *parser, pm_constant_id_t saved_param_name) {
6144
+ parser->current_param_name = saved_param_name;
6145
+ }
6146
+
5896
6147
  /**
5897
6148
  * Check if any of the currently visible scopes contain a local variable
5898
6149
  * described by the given constant id.
@@ -5972,23 +6223,28 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
5972
6223
  /**
5973
6224
  * Add a parameter name to the current scope and check whether the name of the
5974
6225
  * parameter is unique or not.
6226
+ *
6227
+ * Returns `true` if this is a duplicate parameter name, otherwise returns
6228
+ * false.
5975
6229
  */
5976
- static void
6230
+ static bool
5977
6231
  pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
5978
6232
  // We want to check whether the parameter name is a numbered parameter or
5979
6233
  // not.
5980
6234
  pm_refute_numbered_parameter(parser, name->start, name->end);
5981
6235
 
5982
- // We want to ignore any parameter name that starts with an underscore.
5983
- if ((name->start < name->end) && (*name->start == '_')) return;
5984
-
5985
6236
  // Otherwise we'll fetch the constant id for the parameter name and check
5986
6237
  // whether it's already in the current scope.
5987
6238
  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
5988
6239
 
5989
6240
  if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
5990
- pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
6241
+ // Add an error if the parameter doesn't start with _ and has been seen before
6242
+ if ((name->start < name->end) && (*name->start != '_')) {
6243
+ pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
6244
+ }
6245
+ return true;
5991
6246
  }
6247
+ return false;
5992
6248
  }
5993
6249
 
5994
6250
  /**
@@ -6029,7 +6285,7 @@ char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
6029
6285
  } else if (*b < 0x80) {
6030
6286
  return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
6031
6287
  } else {
6032
- return (size_t) (pm_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
6288
+ return pm_encoding_utf_8_char_width(b, parser->end - b);
6033
6289
  }
6034
6290
  }
6035
6291
 
@@ -6042,7 +6298,7 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
6042
6298
  if (*b < 0x80) {
6043
6299
  return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
6044
6300
  } else {
6045
- return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
6301
+ return pm_encoding_utf_8_char_width(b, end - b);
6046
6302
  }
6047
6303
  }
6048
6304
 
@@ -6317,8 +6573,10 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) {
6317
6573
  */
6318
6574
  static void
6319
6575
  parser_lex_magic_comment_frozen_string_literal_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
6320
- if (start + 4 <= end && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
6576
+ if ((start + 4 <= end) && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
6321
6577
  parser->frozen_string_literal = true;
6578
+ } else if ((start + 5 <= end) && pm_strncasecmp(start, (const uint8_t *) "false", 5) == 0) {
6579
+ parser->frozen_string_literal = false;
6322
6580
  }
6323
6581
  }
6324
6582
 
@@ -6541,21 +6799,27 @@ context_terminator(pm_context_t context, pm_token_t *token) {
6541
6799
  return token->type == PM_TOKEN_BRACE_RIGHT;
6542
6800
  case PM_CONTEXT_PREDICATE:
6543
6801
  return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
6802
+ case PM_CONTEXT_NONE:
6803
+ return false;
6544
6804
  }
6545
6805
 
6546
6806
  return false;
6547
6807
  }
6548
6808
 
6549
- static bool
6550
- context_recoverable(pm_parser_t *parser, pm_token_t *token) {
6809
+ /**
6810
+ * Returns the context that the given token is found to be terminating, or
6811
+ * returns PM_CONTEXT_NONE.
6812
+ */
6813
+ static pm_context_t
6814
+ context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
6551
6815
  pm_context_node_t *context_node = parser->current_context;
6552
6816
 
6553
6817
  while (context_node != NULL) {
6554
- if (context_terminator(context_node->context, token)) return true;
6818
+ if (context_terminator(context_node->context, token)) return context_node->context;
6555
6819
  context_node = context_node->prev;
6556
6820
  }
6557
6821
 
6558
- return false;
6822
+ return PM_CONTEXT_NONE;
6559
6823
  }
6560
6824
 
6561
6825
  static bool
@@ -6583,7 +6847,7 @@ context_pop(pm_parser_t *parser) {
6583
6847
  }
6584
6848
 
6585
6849
  static bool
6586
- context_p(pm_parser_t *parser, pm_context_t context) {
6850
+ context_p(const pm_parser_t *parser, pm_context_t context) {
6587
6851
  pm_context_node_t *context_node = parser->current_context;
6588
6852
 
6589
6853
  while (context_node != NULL) {
@@ -6595,7 +6859,7 @@ context_p(pm_parser_t *parser, pm_context_t context) {
6595
6859
  }
6596
6860
 
6597
6861
  static bool
6598
- context_def_p(pm_parser_t *parser) {
6862
+ context_def_p(const pm_parser_t *parser) {
6599
6863
  pm_context_node_t *context_node = parser->current_context;
6600
6864
 
6601
6865
  while (context_node != NULL) {
@@ -6618,6 +6882,55 @@ context_def_p(pm_parser_t *parser) {
6618
6882
  return false;
6619
6883
  }
6620
6884
 
6885
+ /**
6886
+ * Returns a human readable string for the given context, used in error
6887
+ * messages.
6888
+ */
6889
+ static const char *
6890
+ context_human(pm_context_t context) {
6891
+ switch (context) {
6892
+ case PM_CONTEXT_NONE:
6893
+ assert(false && "unreachable");
6894
+ return "";
6895
+ case PM_CONTEXT_BEGIN: return "begin statement";
6896
+ case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
6897
+ case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
6898
+ case PM_CONTEXT_CASE_WHEN: return "'when' clause";
6899
+ case PM_CONTEXT_CASE_IN: return "'in' clause";
6900
+ case PM_CONTEXT_CLASS: return "class definition";
6901
+ case PM_CONTEXT_DEF: return "method definition";
6902
+ case PM_CONTEXT_DEF_PARAMS: return "method parameters";
6903
+ case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
6904
+ case PM_CONTEXT_ELSE: return "'else' clause";
6905
+ case PM_CONTEXT_ELSIF: return "'elsif' clause";
6906
+ case PM_CONTEXT_EMBEXPR: return "embedded expression";
6907
+ case PM_CONTEXT_ENSURE: return "'ensure' clause";
6908
+ case PM_CONTEXT_ENSURE_DEF: return "'ensure' clause";
6909
+ case PM_CONTEXT_FOR: return "for loop";
6910
+ case PM_CONTEXT_FOR_INDEX: return "for loop index";
6911
+ case PM_CONTEXT_IF: return "if statement";
6912
+ case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
6913
+ case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
6914
+ case PM_CONTEXT_MAIN: return "top level context";
6915
+ case PM_CONTEXT_MODULE: return "module definition";
6916
+ case PM_CONTEXT_PARENS: return "parentheses";
6917
+ case PM_CONTEXT_POSTEXE: return "'END' block";
6918
+ case PM_CONTEXT_PREDICATE: return "predicate";
6919
+ case PM_CONTEXT_PREEXE: return "'BEGIN' block";
6920
+ case PM_CONTEXT_RESCUE_ELSE: return "'else' clause";
6921
+ case PM_CONTEXT_RESCUE_ELSE_DEF: return "'else' clause";
6922
+ case PM_CONTEXT_RESCUE: return "'rescue' clause";
6923
+ case PM_CONTEXT_RESCUE_DEF: return "'rescue' clause";
6924
+ case PM_CONTEXT_SCLASS: return "singleton class definition";
6925
+ case PM_CONTEXT_UNLESS: return "unless statement";
6926
+ case PM_CONTEXT_UNTIL: return "until statement";
6927
+ case PM_CONTEXT_WHILE: return "while statement";
6928
+ }
6929
+
6930
+ assert(false && "unreachable");
6931
+ return "";
6932
+ }
6933
+
6621
6934
  /******************************************************************************/
6622
6935
  /* Specific token lexers */
6623
6936
  /******************************************************************************/
@@ -7982,7 +8295,6 @@ pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
7982
8295
  /**
7983
8296
  * When we're about to return from lexing the current token and we know for sure
7984
8297
  * that we have found an escape sequence, this function is called to copy the
7985
- *
7986
8298
  * contents of the token buffer into the current string on the parser so that it
7987
8299
  * can be attached to the correct node.
7988
8300
  */
@@ -7997,7 +8309,6 @@ pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
7997
8309
  * string. If we haven't pushed anything into the buffer, this means that we
7998
8310
  * never found an escape sequence, so we can directly reference the bounds of
7999
8311
  * the current string. Either way, at the return of this function it is expected
8000
- *
8001
8312
  * that parser->current_string is established in such a way that it can be
8002
8313
  * attached to a node.
8003
8314
  */
@@ -8016,7 +8327,6 @@ pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
8016
8327
  * point into the buffer because we're about to provide a string that has
8017
8328
  * different content than a direct slice of the source.
8018
8329
  *
8019
- *
8020
8330
  * It is expected that the parser's current token end will be pointing at one
8021
8331
  * byte past the backslash that starts the escape sequence.
8022
8332
  */
@@ -8070,6 +8380,34 @@ pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor,
8070
8380
  return whitespace;
8071
8381
  }
8072
8382
 
8383
+ /**
8384
+ * Lex past the delimiter of a percent literal. Handle newlines and heredocs
8385
+ * appropriately.
8386
+ */
8387
+ static uint8_t
8388
+ pm_lex_percent_delimiter(pm_parser_t *parser) {
8389
+ size_t eol_length = match_eol(parser);
8390
+
8391
+ if (eol_length) {
8392
+ if (parser->heredoc_end) {
8393
+ // If we have already lexed a heredoc, then the newline has already
8394
+ // been added to the list. In this case we want to just flush the
8395
+ // heredoc end.
8396
+ parser_flush_heredoc_end(parser);
8397
+ } else {
8398
+ // Otherwise, we'll add the newline to the list of newlines.
8399
+ pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
8400
+ }
8401
+
8402
+ const uint8_t delimiter = *parser->current.end;
8403
+ parser->current.end += eol_length;
8404
+
8405
+ return delimiter;
8406
+ }
8407
+
8408
+ return *parser->current.end++;
8409
+ }
8410
+
8073
8411
  /**
8074
8412
  * This is a convenience macro that will set the current token type, call the
8075
8413
  * lex callback, and then return from the parser_lex function.
@@ -8635,7 +8973,7 @@ parser_lex(pm_parser_t *parser) {
8635
8973
  // this is not a valid heredoc declaration. In this case we
8636
8974
  // will add an error, but we will still return a heredoc
8637
8975
  // start.
8638
- pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
8976
+ pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
8639
8977
  body_start = parser->end;
8640
8978
  } else {
8641
8979
  // Otherwise, we want to indicate that the body of the
@@ -8826,12 +9164,10 @@ parser_lex(pm_parser_t *parser) {
8826
9164
  LEX(PM_TOKEN_PLUS_EQUAL);
8827
9165
  }
8828
9166
 
8829
- bool spcarg = lex_state_spcarg_p(parser, space_seen);
8830
- if (spcarg) {
8831
- pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS);
8832
- }
8833
-
8834
- if (lex_state_beg_p(parser) || spcarg) {
9167
+ if (
9168
+ lex_state_beg_p(parser) ||
9169
+ (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
9170
+ ) {
8835
9171
  lex_state_set(parser, PM_LEX_STATE_BEG);
8836
9172
 
8837
9173
  if (pm_char_is_decimal_digit(peek(parser))) {
@@ -8871,11 +9207,12 @@ parser_lex(pm_parser_t *parser) {
8871
9207
  }
8872
9208
 
8873
9209
  bool spcarg = lex_state_spcarg_p(parser, space_seen);
8874
- if (spcarg) {
9210
+ bool is_beg = lex_state_beg_p(parser);
9211
+ if (!is_beg && spcarg) {
8875
9212
  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
8876
9213
  }
8877
9214
 
8878
- if (lex_state_beg_p(parser) || spcarg) {
9215
+ if (is_beg || spcarg) {
8879
9216
  lex_state_set(parser, PM_LEX_STATE_BEG);
8880
9217
  LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
8881
9218
  }
@@ -9026,15 +9363,8 @@ parser_lex(pm_parser_t *parser) {
9026
9363
  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
9027
9364
  }
9028
9365
 
9029
- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9030
-
9031
- size_t eol_length = match_eol(parser);
9032
- if (eol_length) {
9033
- parser->current.end += eol_length;
9034
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9035
- } else {
9036
- parser->current.end++;
9037
- }
9366
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9367
+ lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9038
9368
 
9039
9369
  if (parser->current.end < parser->end) {
9040
9370
  LEX(PM_TOKEN_STRING_BEGIN);
@@ -9054,7 +9384,7 @@ parser_lex(pm_parser_t *parser) {
9054
9384
  parser->current.end++;
9055
9385
 
9056
9386
  if (parser->current.end < parser->end) {
9057
- lex_mode_push_list(parser, false, *parser->current.end++);
9387
+ lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
9058
9388
  } else {
9059
9389
  lex_mode_push_list_eof(parser);
9060
9390
  }
@@ -9065,7 +9395,7 @@ parser_lex(pm_parser_t *parser) {
9065
9395
  parser->current.end++;
9066
9396
 
9067
9397
  if (parser->current.end < parser->end) {
9068
- lex_mode_push_list(parser, true, *parser->current.end++);
9398
+ lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
9069
9399
  } else {
9070
9400
  lex_mode_push_list_eof(parser);
9071
9401
  }
@@ -9076,9 +9406,8 @@ parser_lex(pm_parser_t *parser) {
9076
9406
  parser->current.end++;
9077
9407
 
9078
9408
  if (parser->current.end < parser->end) {
9079
- lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9080
- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9081
- parser->current.end++;
9409
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9410
+ lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9082
9411
  } else {
9083
9412
  lex_mode_push_regexp(parser, '\0', '\0');
9084
9413
  }
@@ -9089,9 +9418,8 @@ parser_lex(pm_parser_t *parser) {
9089
9418
  parser->current.end++;
9090
9419
 
9091
9420
  if (parser->current.end < parser->end) {
9092
- lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9093
- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9094
- parser->current.end++;
9421
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9422
+ lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9095
9423
  } else {
9096
9424
  lex_mode_push_string_eof(parser);
9097
9425
  }
@@ -9102,9 +9430,8 @@ parser_lex(pm_parser_t *parser) {
9102
9430
  parser->current.end++;
9103
9431
 
9104
9432
  if (parser->current.end < parser->end) {
9105
- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9106
- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9107
- parser->current.end++;
9433
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9434
+ lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9108
9435
  } else {
9109
9436
  lex_mode_push_string_eof(parser);
9110
9437
  }
@@ -9115,9 +9442,9 @@ parser_lex(pm_parser_t *parser) {
9115
9442
  parser->current.end++;
9116
9443
 
9117
9444
  if (parser->current.end < parser->end) {
9118
- lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9445
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9446
+ lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9119
9447
  lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
9120
- parser->current.end++;
9121
9448
  } else {
9122
9449
  lex_mode_push_string_eof(parser);
9123
9450
  }
@@ -9128,7 +9455,7 @@ parser_lex(pm_parser_t *parser) {
9128
9455
  parser->current.end++;
9129
9456
 
9130
9457
  if (parser->current.end < parser->end) {
9131
- lex_mode_push_list(parser, false, *parser->current.end++);
9458
+ lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
9132
9459
  } else {
9133
9460
  lex_mode_push_list_eof(parser);
9134
9461
  }
@@ -9139,7 +9466,7 @@ parser_lex(pm_parser_t *parser) {
9139
9466
  parser->current.end++;
9140
9467
 
9141
9468
  if (parser->current.end < parser->end) {
9142
- lex_mode_push_list(parser, true, *parser->current.end++);
9469
+ lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
9143
9470
  } else {
9144
9471
  lex_mode_push_list_eof(parser);
9145
9472
  }
@@ -9150,8 +9477,8 @@ parser_lex(pm_parser_t *parser) {
9150
9477
  parser->current.end++;
9151
9478
 
9152
9479
  if (parser->current.end < parser->end) {
9153
- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9154
- parser->current.end++;
9480
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9481
+ lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9155
9482
  } else {
9156
9483
  lex_mode_push_string_eof(parser);
9157
9484
  }
@@ -9888,15 +10215,22 @@ parser_lex(pm_parser_t *parser) {
9888
10215
  parser->next_start = NULL;
9889
10216
  }
9890
10217
 
9891
- // We'll check if we're at the end of the file. If we are, then we need to
9892
- // return the EOF token.
10218
+ // Now let's grab the information about the identifier off of the
10219
+ // current lex mode.
10220
+ pm_lex_mode_t *lex_mode = parser->lex_modes.current;
10221
+
10222
+ // We'll check if we're at the end of the file. If we are, then we
10223
+ // will add an error (because we weren't able to find the
10224
+ // terminator) but still continue parsing so that content after the
10225
+ // declaration of the heredoc can be parsed.
9893
10226
  if (parser->current.end >= parser->end) {
9894
- LEX(PM_TOKEN_EOF);
10227
+ pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
10228
+ parser->next_start = lex_mode->as.heredoc.next_start;
10229
+ parser->heredoc_end = parser->current.end;
10230
+ lex_state_set(parser, PM_LEX_STATE_END);
10231
+ LEX(PM_TOKEN_HEREDOC_END);
9895
10232
  }
9896
10233
 
9897
- // Now let's grab the information about the identifier off of the current
9898
- // lex mode.
9899
- pm_lex_mode_t *lex_mode = parser->lex_modes.current;
9900
10234
  const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
9901
10235
  size_t ident_length = lex_mode->as.heredoc.ident_length;
9902
10236
 
@@ -10184,8 +10518,8 @@ parser_lex(pm_parser_t *parser) {
10184
10518
  typedef enum {
10185
10519
  PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
10186
10520
  PM_BINDING_POWER_STATEMENT = 2,
10187
- PM_BINDING_POWER_MODIFIER = 4, // if unless until while
10188
- PM_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue
10521
+ PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
10522
+ PM_BINDING_POWER_MODIFIER = 6, // if unless until while
10189
10523
  PM_BINDING_POWER_COMPOSITION = 8, // and or
10190
10524
  PM_BINDING_POWER_NOT = 10, // not
10191
10525
  PM_BINDING_POWER_MATCH = 12, // => in
@@ -10239,15 +10573,15 @@ typedef struct {
10239
10573
  #define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
10240
10574
 
10241
10575
  pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
10576
+ // rescue
10577
+ [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
10578
+
10242
10579
  // if unless until while
10243
10580
  [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10244
10581
  [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10245
10582
  [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10246
10583
  [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10247
10584
 
10248
- // rescue
10249
- [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
10250
-
10251
10585
  // and or
10252
10586
  [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
10253
10587
  [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
@@ -10866,7 +11200,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10866
11200
  return target;
10867
11201
  }
10868
11202
 
10869
- if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
11203
+ if (char_is_identifier_start(parser, call->message_loc.start)) {
10870
11204
  // When we get here, we have a method call, because it was
10871
11205
  // previously marked as a method call but now we have an =. This
10872
11206
  // looks like:
@@ -10984,6 +11318,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
10984
11318
  static pm_node_t *
10985
11319
  parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power) {
10986
11320
  pm_node_t *result = parse_targets(parser, first_target, binding_power);
11321
+ accept1(parser, PM_TOKEN_NEWLINE);
10987
11322
 
10988
11323
  // Ensure that we have either an = or a ) after the targets.
10989
11324
  if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
@@ -11084,8 +11419,9 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
11084
11419
 
11085
11420
  if (token_begins_expression_p(parser->current.type)) {
11086
11421
  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11087
- } else if (pm_parser_local_depth(parser, &operator) == -1) {
11088
- pm_parser_err_token(parser, &operator, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11422
+ }
11423
+ else {
11424
+ pm_parser_scope_forwarding_keywords_check(parser, &operator);
11089
11425
  }
11090
11426
 
11091
11427
  element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
@@ -11234,13 +11570,8 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11234
11570
  if (token_begins_expression_p(parser->current.type)) {
11235
11571
  expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_ARGUMENT);
11236
11572
  } else {
11237
- if (pm_parser_local_depth(parser, &operator) == -1) {
11238
- // A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
11239
- pm_constant_id_t ellipsis_id = pm_parser_constant_id_constant(parser, "...", 3);
11240
- if (pm_parser_local_depth_constant_id(parser, ellipsis_id) == -1) {
11241
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
11242
- }
11243
- }
11573
+ // A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
11574
+ pm_parser_scope_forwarding_block_check(parser, &operator);
11244
11575
  }
11245
11576
 
11246
11577
  argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
@@ -11258,10 +11589,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11258
11589
  pm_token_t operator = parser->previous;
11259
11590
 
11260
11591
  if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
11261
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
11262
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
11263
- }
11264
-
11592
+ pm_parser_scope_forwarding_positionals_check(parser, &operator);
11265
11593
  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
11266
11594
  } else {
11267
11595
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
@@ -11287,15 +11615,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11287
11615
  pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
11288
11616
  argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
11289
11617
  } else {
11290
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
11291
- pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
11292
- }
11618
+ pm_parser_scope_forwarding_all_check(parser, &parser->previous);
11293
11619
  if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
11294
11620
  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
11295
11621
  }
11296
11622
 
11297
11623
  argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
11298
11624
  parse_arguments_append(parser, arguments, argument);
11625
+ arguments->has_forwarding = true;
11299
11626
  parsed_forwarding_arguments = true;
11300
11627
  break;
11301
11628
  }
@@ -11338,6 +11665,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11338
11665
  }
11339
11666
 
11340
11667
  parsed_bare_hash = true;
11668
+ } else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
11669
+ // TODO: Could we solve this with binding powers instead?
11670
+ pm_parser_err_current(parser, PM_ERR_ARGUMENT_IN);
11341
11671
  }
11342
11672
 
11343
11673
  parse_arguments_append(parser, arguments, argument);
@@ -11414,7 +11744,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
11414
11744
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11415
11745
  pm_token_t name = parser->previous;
11416
11746
  value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
11417
- pm_parser_parameter_name_check(parser, &name);
11747
+ if (pm_parser_parameter_name_check(parser, &name)) {
11748
+ pm_node_flag_set_repeated_parameter(value);
11749
+ }
11418
11750
  pm_parser_local_add_token(parser, &name);
11419
11751
  }
11420
11752
 
@@ -11424,7 +11756,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
11424
11756
  pm_token_t name = parser->previous;
11425
11757
 
11426
11758
  param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
11427
- pm_parser_parameter_name_check(parser, &name);
11759
+ if (pm_parser_parameter_name_check(parser, &name)) {
11760
+ pm_node_flag_set_repeated_parameter(param);
11761
+ }
11428
11762
  pm_parser_local_add_token(parser, &name);
11429
11763
  }
11430
11764
 
@@ -11541,19 +11875,20 @@ parse_parameters(
11541
11875
  pm_token_t operator = parser->previous;
11542
11876
  pm_token_t name;
11543
11877
 
11878
+ bool repeated = false;
11544
11879
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11545
11880
  name = parser->previous;
11546
- pm_parser_parameter_name_check(parser, &name);
11881
+ repeated = pm_parser_parameter_name_check(parser, &name);
11547
11882
  pm_parser_local_add_token(parser, &name);
11548
11883
  } else {
11549
11884
  name = not_provided(parser);
11550
-
11551
- if (allows_forwarding_parameters) {
11552
- pm_parser_local_add_token(parser, &operator);
11553
- }
11885
+ parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
11554
11886
  }
11555
11887
 
11556
11888
  pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
11889
+ if (repeated) {
11890
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
11891
+ }
11557
11892
  if (params->block == NULL) {
11558
11893
  pm_parameters_node_block_set(params, param);
11559
11894
  } else {
@@ -11572,9 +11907,8 @@ parse_parameters(
11572
11907
  update_parameter_state(parser, &parser->current, &order);
11573
11908
  parser_lex(parser);
11574
11909
 
11575
- if (allows_forwarding_parameters) {
11576
- pm_parser_local_add_token(parser, &parser->previous);
11577
- }
11910
+ parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
11911
+ parser->current_scope->forwarding_params |= PM_FORWARDING_ALL;
11578
11912
 
11579
11913
  pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
11580
11914
  if (params->keyword_rest != NULL) {
@@ -11626,20 +11960,23 @@ parse_parameters(
11626
11960
  }
11627
11961
 
11628
11962
  pm_token_t name = parser->previous;
11629
- pm_parser_parameter_name_check(parser, &name);
11963
+ bool repeated = pm_parser_parameter_name_check(parser, &name);
11630
11964
  pm_parser_local_add_token(parser, &name);
11631
11965
 
11632
11966
  if (accept1(parser, PM_TOKEN_EQUAL)) {
11633
11967
  pm_token_t operator = parser->previous;
11634
11968
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11635
- pm_constant_id_t old_param_name = parser->current_param_name;
11636
- parser->current_param_name = pm_parser_constant_id_token(parser, &name);
11969
+
11970
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &name));
11637
11971
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
11638
11972
 
11639
11973
  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
11974
+ if (repeated) {
11975
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
11976
+ }
11640
11977
  pm_parameters_node_optionals_append(params, param);
11641
11978
 
11642
- parser->current_param_name = old_param_name;
11979
+ pm_parser_current_param_name_restore(parser, saved_param_name);
11643
11980
  context_pop(parser);
11644
11981
 
11645
11982
  // If parsing the value of the parameter resulted in error recovery,
@@ -11651,9 +11988,15 @@ parse_parameters(
11651
11988
  }
11652
11989
  } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
11653
11990
  pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
11991
+ if (repeated) {
11992
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
11993
+ }
11654
11994
  pm_parameters_node_requireds_append(params, (pm_node_t *) param);
11655
11995
  } else {
11656
11996
  pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
11997
+ if (repeated) {
11998
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
11999
+ }
11657
12000
  pm_parameters_node_posts_append(params, (pm_node_t *) param);
11658
12001
  }
11659
12002
 
@@ -11668,7 +12011,7 @@ parse_parameters(
11668
12011
  pm_token_t local = name;
11669
12012
  local.end -= 1;
11670
12013
 
11671
- pm_parser_parameter_name_check(parser, &local);
12014
+ bool repeated = pm_parser_parameter_name_check(parser, &local);
11672
12015
  pm_parser_local_add_token(parser, &local);
11673
12016
 
11674
12017
  switch (parser->current.type) {
@@ -11676,6 +12019,9 @@ parse_parameters(
11676
12019
  case PM_TOKEN_PARENTHESIS_RIGHT:
11677
12020
  case PM_TOKEN_PIPE: {
11678
12021
  pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
12022
+ if (repeated) {
12023
+ pm_node_flag_set_repeated_parameter(param);
12024
+ }
11679
12025
  pm_parameters_node_keywords_append(params, param);
11680
12026
  break;
11681
12027
  }
@@ -11687,6 +12033,9 @@ parse_parameters(
11687
12033
  }
11688
12034
 
11689
12035
  pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
12036
+ if (repeated) {
12037
+ pm_node_flag_set_repeated_parameter(param);
12038
+ }
11690
12039
  pm_parameters_node_keywords_append(params, param);
11691
12040
  break;
11692
12041
  }
@@ -11695,17 +12044,22 @@ parse_parameters(
11695
12044
 
11696
12045
  if (token_begins_expression_p(parser->current.type)) {
11697
12046
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11698
- pm_constant_id_t old_param_name = parser->current_param_name;
11699
- parser->current_param_name = pm_parser_constant_id_token(parser, &local);
12047
+
12048
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &local));
11700
12049
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
11701
- parser->current_param_name = old_param_name;
12050
+
12051
+ pm_parser_current_param_name_restore(parser, saved_param_name);
11702
12052
  context_pop(parser);
12053
+
11703
12054
  param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
11704
12055
  }
11705
12056
  else {
11706
12057
  param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
11707
12058
  }
11708
12059
 
12060
+ if (repeated) {
12061
+ pm_node_flag_set_repeated_parameter(param);
12062
+ }
11709
12063
  pm_parameters_node_keywords_append(params, param);
11710
12064
 
11711
12065
  // If parsing the value of the parameter resulted in error recovery,
@@ -11728,20 +12082,21 @@ parse_parameters(
11728
12082
 
11729
12083
  pm_token_t operator = parser->previous;
11730
12084
  pm_token_t name;
11731
-
12085
+ bool repeated = false;
11732
12086
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11733
12087
  name = parser->previous;
11734
- pm_parser_parameter_name_check(parser, &name);
12088
+ repeated = pm_parser_parameter_name_check(parser, &name);
11735
12089
  pm_parser_local_add_token(parser, &name);
11736
12090
  } else {
11737
12091
  name = not_provided(parser);
11738
12092
 
11739
- if (allows_forwarding_parameters) {
11740
- pm_parser_local_add_token(parser, &operator);
11741
- }
12093
+ parser->current_scope->forwarding_params |= PM_FORWARDING_POSITIONALS;
11742
12094
  }
11743
12095
 
11744
12096
  pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
12097
+ if (repeated) {
12098
+ pm_node_flag_set_repeated_parameter(param);
12099
+ }
11745
12100
  if (params->rest == NULL) {
11746
12101
  pm_parameters_node_rest_set(params, param);
11747
12102
  } else {
@@ -11764,19 +12119,21 @@ parse_parameters(
11764
12119
  } else {
11765
12120
  pm_token_t name;
11766
12121
 
12122
+ bool repeated = false;
11767
12123
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11768
12124
  name = parser->previous;
11769
- pm_parser_parameter_name_check(parser, &name);
12125
+ repeated = pm_parser_parameter_name_check(parser, &name);
11770
12126
  pm_parser_local_add_token(parser, &name);
11771
12127
  } else {
11772
12128
  name = not_provided(parser);
11773
12129
 
11774
- if (allows_forwarding_parameters) {
11775
- pm_parser_local_add_token(parser, &operator);
11776
- }
12130
+ parser->current_scope->forwarding_params |= PM_FORWARDING_KEYWORDS;
11777
12131
  }
11778
12132
 
11779
12133
  param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
12134
+ if (repeated) {
12135
+ pm_node_flag_set_repeated_parameter(param);
12136
+ }
11780
12137
  }
11781
12138
 
11782
12139
  if (params->keyword_rest == NULL) {
@@ -12012,10 +12369,13 @@ parse_block_parameters(
12012
12369
  if ((opening->type != PM_TOKEN_NOT_PROVIDED) && accept1(parser, PM_TOKEN_SEMICOLON)) {
12013
12370
  do {
12014
12371
  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
12015
- pm_parser_parameter_name_check(parser, &parser->previous);
12372
+ bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
12016
12373
  pm_parser_local_add_token(parser, &parser->previous);
12017
12374
 
12018
12375
  pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
12376
+ if (repeated) {
12377
+ pm_node_flag_set_repeated_parameter((pm_node_t *)local);
12378
+ }
12019
12379
  pm_block_parameters_node_append_local(block_parameters, local);
12020
12380
  } while (accept1(parser, PM_TOKEN_COMMA));
12021
12381
  }
@@ -12031,8 +12391,10 @@ parse_block(pm_parser_t *parser) {
12031
12391
  pm_token_t opening = parser->previous;
12032
12392
  accept1(parser, PM_TOKEN_NEWLINE);
12033
12393
 
12394
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
12034
12395
  pm_accepts_block_stack_push(parser, true);
12035
12396
  pm_parser_scope_push(parser, false);
12397
+
12036
12398
  pm_block_parameters_node_t *block_parameters = NULL;
12037
12399
 
12038
12400
  if (accept1(parser, PM_TOKEN_PIPE)) {
@@ -12053,12 +12415,6 @@ parse_block(pm_parser_t *parser) {
12053
12415
  pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
12054
12416
  }
12055
12417
 
12056
- uint32_t locals_body_index = 0;
12057
-
12058
- if (block_parameters) {
12059
- locals_body_index = (uint32_t) parser->current_scope->locals.size;
12060
- }
12061
-
12062
12418
  accept1(parser, PM_TOKEN_NEWLINE);
12063
12419
  pm_node_t *statements = NULL;
12064
12420
 
@@ -12090,13 +12446,14 @@ parse_block(pm_parser_t *parser) {
12090
12446
 
12091
12447
  if (parameters == NULL && (maximum > 0)) {
12092
12448
  parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = opening.start, .end = parser->previous.end }, maximum);
12093
- locals_body_index = maximum;
12094
12449
  }
12095
12450
 
12096
12451
  pm_constant_id_list_t locals = parser->current_scope->locals;
12097
12452
  pm_parser_scope_pop(parser);
12098
12453
  pm_accepts_block_stack_pop(parser);
12099
- return pm_block_node_create(parser, &locals, locals_body_index, &opening, parameters, statements, &parser->previous);
12454
+ pm_parser_current_param_name_restore(parser, saved_param_name);
12455
+
12456
+ return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
12100
12457
  }
12101
12458
 
12102
12459
  /**
@@ -12157,14 +12514,20 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
12157
12514
  }
12158
12515
 
12159
12516
  if (block != NULL) {
12160
- if (arguments->block == NULL) {
12517
+ if (arguments->block == NULL && !arguments->has_forwarding) {
12161
12518
  arguments->block = (pm_node_t *) block;
12162
12519
  } else {
12163
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
12164
- if (arguments->arguments == NULL) {
12165
- arguments->arguments = pm_arguments_node_create(parser);
12520
+ if (arguments->has_forwarding) {
12521
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
12522
+ } else {
12523
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
12524
+ }
12525
+ if (arguments->block != NULL) {
12526
+ if (arguments->arguments == NULL) {
12527
+ arguments->arguments = pm_arguments_node_create(parser);
12528
+ }
12529
+ pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
12166
12530
  }
12167
- pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
12168
12531
  arguments->block = (pm_node_t *) block;
12169
12532
  }
12170
12533
  }
@@ -12384,8 +12747,14 @@ static inline pm_node_flags_t
12384
12747
  parse_unescaped_encoding(const pm_parser_t *parser) {
12385
12748
  if (parser->explicit_encoding != NULL) {
12386
12749
  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
12750
+ // If the there's an explicit encoding and it's using a UTF-8 escape
12751
+ // sequence, then mark the string as UTF-8.
12387
12752
  return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
12388
12753
  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
12754
+ // If there's a non-UTF-8 escape sequence being used, then the
12755
+ // string uses the source encoding, unless the source is marked as
12756
+ // US-ASCII. In that case the string is forced as ASCII-8BIT in
12757
+ // order to keep the string valid.
12389
12758
  return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
12390
12759
  }
12391
12760
  }
@@ -12509,14 +12878,54 @@ parse_string_part(pm_parser_t *parser) {
12509
12878
  }
12510
12879
  }
12511
12880
 
12881
+ /**
12882
+ * When creating a symbol, unary operators that cannot be binary operators
12883
+ * automatically drop trailing `@` characters. This happens at the parser level,
12884
+ * such that `~@` is parsed as `~` and `!@` is parsed as `!`. We do that here.
12885
+ */
12886
+ static const uint8_t *
12887
+ parse_operator_symbol_name(const pm_token_t *name) {
12888
+ switch (name->type) {
12889
+ case PM_TOKEN_TILDE:
12890
+ case PM_TOKEN_BANG:
12891
+ if (name->end[-1] == '@') return name->end - 1;
12892
+ /* fallthrough */
12893
+ default:
12894
+ return name->end;
12895
+ }
12896
+ }
12897
+
12898
+ static pm_node_t *
12899
+ parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
12900
+ pm_token_t closing = not_provided(parser);
12901
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
12902
+
12903
+ const uint8_t *end = parse_operator_symbol_name(&parser->current);
12904
+
12905
+ if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
12906
+ parser_lex(parser);
12907
+
12908
+ pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
12909
+ pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
12910
+
12911
+ return (pm_node_t *) symbol;
12912
+ }
12913
+
12914
+ /**
12915
+ * Parse a symbol node. This function will get called immediately after finding
12916
+ * a symbol opening token. This handles parsing bare symbols and interpolated
12917
+ * symbols.
12918
+ */
12512
12919
  static pm_node_t *
12513
12920
  parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state) {
12514
- pm_token_t opening = parser->previous;
12921
+ const pm_token_t opening = parser->previous;
12515
12922
 
12516
12923
  if (lex_mode->mode != PM_LEX_STRING) {
12517
12924
  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
12518
12925
 
12519
12926
  switch (parser->current.type) {
12927
+ case PM_CASE_OPERATOR:
12928
+ return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
12520
12929
  case PM_TOKEN_IDENTIFIER:
12521
12930
  case PM_TOKEN_CONSTANT:
12522
12931
  case PM_TOKEN_INSTANCE_VARIABLE:
@@ -12528,10 +12937,6 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12528
12937
  case PM_CASE_KEYWORD:
12529
12938
  parser_lex(parser);
12530
12939
  break;
12531
- case PM_CASE_OPERATOR:
12532
- lex_state_set(parser, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
12533
- parser_lex(parser);
12534
- break;
12535
12940
  default:
12536
12941
  expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
12537
12942
  break;
@@ -12541,6 +12946,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12541
12946
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12542
12947
 
12543
12948
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
12949
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
12950
+
12544
12951
  return (pm_node_t *) symbol;
12545
12952
  }
12546
12953
 
@@ -12637,7 +13044,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12637
13044
  } else {
12638
13045
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
12639
13046
  }
12640
- return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
13047
+
13048
+ return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
12641
13049
  }
12642
13050
 
12643
13051
  /**
@@ -12647,8 +13055,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12647
13055
  static inline pm_node_t *
12648
13056
  parse_undef_argument(pm_parser_t *parser) {
12649
13057
  switch (parser->current.type) {
13058
+ case PM_CASE_OPERATOR: {
13059
+ const pm_token_t opening = not_provided(parser);
13060
+ return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
13061
+ }
12650
13062
  case PM_CASE_KEYWORD:
12651
- case PM_CASE_OPERATOR:
12652
13063
  case PM_TOKEN_CONSTANT:
12653
13064
  case PM_TOKEN_IDENTIFIER:
12654
13065
  case PM_TOKEN_METHOD_NAME: {
@@ -12659,6 +13070,8 @@ parse_undef_argument(pm_parser_t *parser) {
12659
13070
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12660
13071
 
12661
13072
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
13073
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
13074
+
12662
13075
  return (pm_node_t *) symbol;
12663
13076
  }
12664
13077
  case PM_TOKEN_SYMBOL_BEGIN: {
@@ -12682,21 +13095,24 @@ parse_undef_argument(pm_parser_t *parser) {
12682
13095
  static inline pm_node_t *
12683
13096
  parse_alias_argument(pm_parser_t *parser, bool first) {
12684
13097
  switch (parser->current.type) {
12685
- case PM_CASE_OPERATOR:
13098
+ case PM_CASE_OPERATOR: {
13099
+ const pm_token_t opening = not_provided(parser);
13100
+ return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
13101
+ }
12686
13102
  case PM_CASE_KEYWORD:
12687
13103
  case PM_TOKEN_CONSTANT:
12688
13104
  case PM_TOKEN_IDENTIFIER:
12689
13105
  case PM_TOKEN_METHOD_NAME: {
12690
- if (first) {
12691
- lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
12692
- }
12693
-
13106
+ if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
12694
13107
  parser_lex(parser);
13108
+
12695
13109
  pm_token_t opening = not_provided(parser);
12696
13110
  pm_token_t closing = not_provided(parser);
12697
13111
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12698
13112
 
12699
13113
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
13114
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
13115
+
12700
13116
  return (pm_node_t *) symbol;
12701
13117
  }
12702
13118
  case PM_TOKEN_SYMBOL_BEGIN: {
@@ -12733,6 +13149,65 @@ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
12733
13149
  return false;
12734
13150
  }
12735
13151
 
13152
+ /**
13153
+ * Parse an identifier into either a local variable read. If the local variable
13154
+ * is not found, it returns NULL instead.
13155
+ */
13156
+ static pm_local_variable_read_node_t *
13157
+ parse_variable(pm_parser_t *parser) {
13158
+ int depth;
13159
+ if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
13160
+ return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
13161
+ }
13162
+
13163
+ if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
13164
+ // Now that we know we have a numbered parameter, we need to check
13165
+ // if it's allowed in this context. If it is, then we will create a
13166
+ // local variable read. If it's not, then we'll create a normal call
13167
+ // node but add an error.
13168
+ if (parser->current_scope->explicit_params) {
13169
+ pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
13170
+ } else if (outer_scope_using_numbered_parameters_p(parser)) {
13171
+ pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
13172
+ } else {
13173
+ // Indicate that this scope is using numbered params so that child
13174
+ // scopes cannot.
13175
+ uint8_t number = parser->previous.start[1];
13176
+
13177
+ // We subtract the value for the character '0' to get the actual
13178
+ // integer value of the number (only _1 through _9 are valid)
13179
+ uint8_t numbered_parameters = (uint8_t) (number - '0');
13180
+ if (numbered_parameters > parser->current_scope->numbered_parameters) {
13181
+ parser->current_scope->numbered_parameters = numbered_parameters;
13182
+ pm_parser_numbered_parameters_set(parser, numbered_parameters);
13183
+ }
13184
+
13185
+ // When you use a numbered parameter, it implies the existence
13186
+ // of all of the locals that exist before it. For example,
13187
+ // referencing _2 means that _1 must exist. Therefore here we
13188
+ // loop through all of the possibilities and add them into the
13189
+ // constant pool.
13190
+ uint8_t current = '1';
13191
+ uint8_t *value;
13192
+
13193
+ while (current < number) {
13194
+ value = malloc(2);
13195
+ value[0] = '_';
13196
+ value[1] = current++;
13197
+ pm_parser_local_add_owned(parser, value, 2);
13198
+ }
13199
+
13200
+ // Now we can add the actual token that is being used. For
13201
+ // this one we can add a shared version since it is directly
13202
+ // referenced in the source.
13203
+ pm_parser_local_add_token(parser, &parser->previous);
13204
+ return pm_local_variable_read_node_create(parser, &parser->previous, 0);
13205
+ }
13206
+ }
13207
+
13208
+ return NULL;
13209
+ }
13210
+
12736
13211
  /**
12737
13212
  * Parse an identifier into either a local variable read or a call.
12738
13213
  */
@@ -12741,56 +13216,8 @@ parse_variable_call(pm_parser_t *parser) {
12741
13216
  pm_node_flags_t flags = 0;
12742
13217
 
12743
13218
  if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
12744
- int depth;
12745
- if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
12746
- return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
12747
- }
12748
-
12749
- if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
12750
- // Now that we know we have a numbered parameter, we need to check
12751
- // if it's allowed in this context. If it is, then we will create a
12752
- // local variable read. If it's not, then we'll create a normal call
12753
- // node but add an error.
12754
- if (parser->current_scope->explicit_params) {
12755
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
12756
- } else if (outer_scope_using_numbered_parameters_p(parser)) {
12757
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
12758
- } else {
12759
- // Indicate that this scope is using numbered params so that child
12760
- // scopes cannot.
12761
- uint8_t number = parser->previous.start[1];
12762
-
12763
- // We subtract the value for the character '0' to get the actual
12764
- // integer value of the number (only _1 through _9 are valid)
12765
- uint8_t numbered_parameters = (uint8_t) (number - '0');
12766
- if (numbered_parameters > parser->current_scope->numbered_parameters) {
12767
- parser->current_scope->numbered_parameters = numbered_parameters;
12768
- pm_parser_numbered_parameters_set(parser, numbered_parameters);
12769
- }
12770
-
12771
- // When you use a numbered parameter, it implies the existence
12772
- // of all of the locals that exist before it. For example,
12773
- // referencing _2 means that _1 must exist. Therefore here we
12774
- // loop through all of the possibilities and add them into the
12775
- // constant pool.
12776
- uint8_t current = '1';
12777
- uint8_t *value;
12778
-
12779
- while (current < number) {
12780
- value = malloc(2);
12781
- value[0] = '_';
12782
- value[1] = current++;
12783
- pm_parser_local_add_owned(parser, value, 2);
12784
- }
12785
-
12786
- // Now we can add the actual token that is being used. For
12787
- // this one we can add a shared version since it is directly
12788
- // referenced in the source.
12789
- pm_parser_local_add_token(parser, &parser->previous);
12790
- return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
12791
- }
12792
- }
12793
-
13219
+ pm_local_variable_read_node_t *node = parse_variable(parser);
13220
+ if (node != NULL) return (pm_node_t *) node;
12794
13221
  flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
12795
13222
  }
12796
13223
 
@@ -13076,43 +13503,77 @@ parse_pattern_keyword_rest(pm_parser_t *parser) {
13076
13503
  return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
13077
13504
  }
13078
13505
 
13506
+ /**
13507
+ * Create an implicit node for the value of a hash pattern that has omitted the
13508
+ * value. This will use an implicit local variable target.
13509
+ */
13510
+ static pm_node_t *
13511
+ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_symbol_node_t *key) {
13512
+ const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
13513
+ pm_constant_id_t name = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
13514
+
13515
+ int current_depth = pm_parser_local_depth_constant_id(parser, name);
13516
+ uint32_t depth;
13517
+
13518
+ if (current_depth == -1) {
13519
+ pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
13520
+ depth = 0;
13521
+ } else {
13522
+ depth = (uint32_t) current_depth;
13523
+ }
13524
+
13525
+ pm_local_variable_target_node_t *target = pm_local_variable_target_node_create_values(parser, value_loc, name, depth);
13526
+ return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
13527
+ }
13528
+
13079
13529
  /**
13080
13530
  * Parse a hash pattern.
13081
13531
  */
13082
13532
  static pm_hash_pattern_node_t *
13083
- parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
13533
+ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) {
13084
13534
  pm_node_list_t assocs = { 0 };
13085
13535
  pm_node_t *rest = NULL;
13086
13536
 
13087
- switch (PM_NODE_TYPE(first_assoc)) {
13088
- case PM_ASSOC_NODE: {
13089
- if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13090
- // Here we have a value for the first assoc in the list, so we will
13091
- // parse it now and update the first assoc.
13092
- pm_node_t *value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
13537
+ switch (PM_NODE_TYPE(first_node)) {
13538
+ case PM_ASSOC_SPLAT_NODE:
13539
+ case PM_NO_KEYWORDS_PARAMETER_NODE:
13540
+ rest = first_node;
13541
+ break;
13542
+ case PM_SYMBOL_NODE: {
13543
+ if (pm_symbol_node_label_p(first_node)) {
13544
+ pm_node_t *value;
13545
+
13546
+ if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13547
+ // Here we have a value for the first assoc in the list, so
13548
+ // we will parse it now.
13549
+ value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
13550
+ } else {
13551
+ // Otherwise, we will create an implicit local variable
13552
+ // target for the value.
13553
+ value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) first_node);
13554
+ }
13093
13555
 
13094
- pm_assoc_node_t *assoc = (pm_assoc_node_t *) first_assoc;
13095
- assoc->base.location.end = value->location.end;
13096
- assoc->value = value;
13097
- } else {
13098
- pm_node_t *key = ((pm_assoc_node_t *) first_assoc)->key;
13556
+ pm_token_t operator = not_provided(parser);
13557
+ pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
13099
13558
 
13100
- if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
13101
- const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
13102
- pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
13103
- }
13559
+ pm_node_list_append(&assocs, assoc);
13560
+ break;
13104
13561
  }
13562
+ }
13563
+ /* fallthrough */
13564
+ default: {
13565
+ // If we get anything else, then this is an error. For this we'll
13566
+ // create a missing node for the value and create an assoc node for
13567
+ // the first node in the list.
13568
+ pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
13105
13569
 
13106
- pm_node_list_append(&assocs, first_assoc);
13570
+ pm_token_t operator = not_provided(parser);
13571
+ pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
13572
+ pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
13573
+
13574
+ pm_node_list_append(&assocs, assoc);
13107
13575
  break;
13108
13576
  }
13109
- case PM_ASSOC_SPLAT_NODE:
13110
- case PM_NO_KEYWORDS_PARAMETER_NODE:
13111
- rest = first_assoc;
13112
- break;
13113
- default:
13114
- assert(false);
13115
- break;
13116
13577
  }
13117
13578
 
13118
13579
  // If there are any other assocs, then we'll parse them now.
@@ -13141,6 +13602,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
13141
13602
  } else {
13142
13603
  const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
13143
13604
  pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
13605
+ value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) key);
13144
13606
  }
13145
13607
 
13146
13608
  pm_token_t operator = not_provided(parser);
@@ -13246,45 +13708,29 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13246
13708
  // pattern node.
13247
13709
  node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
13248
13710
  } else {
13249
- pm_node_t *first_assoc;
13711
+ pm_node_t *first_node;
13250
13712
 
13251
13713
  switch (parser->current.type) {
13252
- case PM_TOKEN_LABEL: {
13714
+ case PM_TOKEN_LABEL:
13253
13715
  parser_lex(parser);
13254
-
13255
- pm_symbol_node_t *key = pm_symbol_node_label_create(parser, &parser->previous);
13256
- pm_token_t operator = not_provided(parser);
13257
-
13258
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
13716
+ first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
13259
13717
  break;
13260
- }
13261
13718
  case PM_TOKEN_USTAR_STAR:
13262
- first_assoc = parse_pattern_keyword_rest(parser);
13719
+ first_node = parse_pattern_keyword_rest(parser);
13263
13720
  break;
13264
- case PM_TOKEN_STRING_BEGIN: {
13265
- pm_node_t *key = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
13266
- pm_token_t operator = not_provided(parser);
13267
-
13268
- if (!pm_symbol_node_label_p(key)) {
13269
- pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
13270
- }
13271
-
13272
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL);
13721
+ case PM_TOKEN_STRING_BEGIN:
13722
+ first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
13273
13723
  break;
13274
- }
13275
13724
  default: {
13276
13725
  parser_lex(parser);
13277
13726
  pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
13278
13727
 
13279
- pm_missing_node_t *key = pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
13280
- pm_token_t operator = not_provided(parser);
13281
-
13282
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
13728
+ first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
13283
13729
  break;
13284
13730
  }
13285
13731
  }
13286
13732
 
13287
- node = parse_pattern_hash(parser, first_assoc);
13733
+ node = parse_pattern_hash(parser, first_node);
13288
13734
 
13289
13735
  accept1(parser, PM_TOKEN_NEWLINE);
13290
13736
  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
@@ -13350,7 +13796,16 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13350
13796
  switch (parser->current.type) {
13351
13797
  case PM_TOKEN_IDENTIFIER: {
13352
13798
  parser_lex(parser);
13353
- pm_node_t *variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
13799
+ pm_node_t *variable = (pm_node_t *) parse_variable(parser);
13800
+ if (variable == NULL) {
13801
+ if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0 && pm_token_is_it(parser->previous.start, parser->previous.end)) {
13802
+ pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
13803
+ variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
13804
+ } else {
13805
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE, (int) (parser->previous.end - parser->previous.start), parser->previous.start);
13806
+ variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
13807
+ }
13808
+ }
13354
13809
 
13355
13810
  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
13356
13811
  }
@@ -13519,9 +13974,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
13519
13974
  case PM_TOKEN_LABEL: {
13520
13975
  parser_lex(parser);
13521
13976
  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
13522
- pm_token_t operator = not_provided(parser);
13523
-
13524
- return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL));
13977
+ return (pm_node_t *) parse_pattern_hash(parser, key);
13525
13978
  }
13526
13979
  case PM_TOKEN_USTAR_STAR: {
13527
13980
  node = parse_pattern_keyword_rest(parser);
@@ -13544,8 +13997,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
13544
13997
  // If we got a dynamic label symbol, then we need to treat it like the
13545
13998
  // beginning of a hash pattern.
13546
13999
  if (pm_symbol_node_label_p(node)) {
13547
- pm_token_t operator = not_provided(parser);
13548
- return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, node, &operator, NULL));
14000
+ return (pm_node_t *) parse_pattern_hash(parser, node);
13549
14001
  }
13550
14002
 
13551
14003
  if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
@@ -13644,7 +14096,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13644
14096
  assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
13645
14097
 
13646
14098
  bool concating = false;
13647
- bool state_is_arg_labeled = lex_state_p(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
14099
+ bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
13648
14100
 
13649
14101
  while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
13650
14102
  pm_node_t *node = NULL;
@@ -13719,7 +14171,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13719
14171
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13720
14172
  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
13721
14173
  } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
13722
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
14174
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
13723
14175
  } else if (match1(parser, PM_TOKEN_EOF)) {
13724
14176
  pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_TERM);
13725
14177
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
@@ -13741,7 +14193,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13741
14193
  pm_node_flag_set(node, parse_unescaped_encoding(parser));
13742
14194
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13743
14195
  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
13744
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
14196
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
13745
14197
  } else {
13746
14198
  // If we get here, then we have interpolation so we'll need
13747
14199
  // to create a string or symbol node with interpolation.
@@ -13834,7 +14286,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13834
14286
  * Parse an expression that begins with the previous node that we just lexed.
13835
14287
  */
13836
14288
  static inline pm_node_t *
13837
- parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
14289
+ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
13838
14290
  switch (parser->current.type) {
13839
14291
  case PM_TOKEN_BRACKET_LEFT_ARRAY: {
13840
14292
  parser_lex(parser);
@@ -13866,9 +14318,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
13866
14318
  pm_node_t *expression = NULL;
13867
14319
 
13868
14320
  if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
13869
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
13870
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
13871
- }
14321
+ pm_parser_scope_forwarding_positionals_check(parser, &operator);
13872
14322
  } else {
13873
14323
  expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR);
13874
14324
  }
@@ -14113,7 +14563,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14113
14563
  if (
14114
14564
  match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
14115
14565
  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14116
- (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
14566
+ (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
14567
+ match1(parser, PM_TOKEN_BRACE_LEFT)
14117
14568
  ) {
14118
14569
  pm_arguments_t arguments = { 0 };
14119
14570
  parse_arguments_list(parser, &arguments, true, accepts_command_call);
@@ -14237,7 +14688,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14237
14688
  // a block, so we need to check for that here.
14238
14689
  if (
14239
14690
  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14240
- (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
14691
+ (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
14692
+ match1(parser, PM_TOKEN_BRACE_LEFT)
14241
14693
  ) {
14242
14694
  pm_arguments_t arguments = { 0 };
14243
14695
  parse_arguments_list(parser, &arguments, true, accepts_command_call);
@@ -14250,6 +14702,31 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14250
14702
 
14251
14703
  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
14252
14704
  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
14705
+ } else {
14706
+ // Check if `it` is not going to be assigned.
14707
+ switch (parser->current.type) {
14708
+ case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
14709
+ case PM_TOKEN_AMPERSAND_EQUAL:
14710
+ case PM_TOKEN_CARET_EQUAL:
14711
+ case PM_TOKEN_EQUAL:
14712
+ case PM_TOKEN_GREATER_GREATER_EQUAL:
14713
+ case PM_TOKEN_LESS_LESS_EQUAL:
14714
+ case PM_TOKEN_MINUS_EQUAL:
14715
+ case PM_TOKEN_PARENTHESIS_RIGHT:
14716
+ case PM_TOKEN_PERCENT_EQUAL:
14717
+ case PM_TOKEN_PIPE_EQUAL:
14718
+ case PM_TOKEN_PIPE_PIPE_EQUAL:
14719
+ case PM_TOKEN_PLUS_EQUAL:
14720
+ case PM_TOKEN_SLASH_EQUAL:
14721
+ case PM_TOKEN_STAR_EQUAL:
14722
+ case PM_TOKEN_STAR_STAR_EQUAL:
14723
+ break;
14724
+ default:
14725
+ // Once we know it's neither a method call nor an
14726
+ // assignment, we can finally create `it` default
14727
+ // parameter.
14728
+ node = pm_node_check_it(parser, node);
14729
+ }
14253
14730
  }
14254
14731
 
14255
14732
  return node;
@@ -14286,6 +14763,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14286
14763
  // If we get here, then we tried to find something in the
14287
14764
  // heredoc but couldn't actually parse anything, so we'll just
14288
14765
  // return a missing node.
14766
+ //
14767
+ // parse_string_part handles its own errors, so there is no need
14768
+ // for us to add one here.
14289
14769
  node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
14290
14770
  } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
14291
14771
  // If we get here, then the part that we parsed was plain string
@@ -14549,11 +15029,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14549
15029
  // for guard clauses in the form of `if` or `unless` statements.
14550
15030
  if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
14551
15031
  pm_token_t keyword = parser->previous;
14552
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
15032
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
14553
15033
  pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
14554
15034
  } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
14555
15035
  pm_token_t keyword = parser->previous;
14556
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
15036
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
14557
15037
  pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
14558
15038
  }
14559
15039
 
@@ -14742,8 +15222,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14742
15222
  pm_token_t operator = parser->previous;
14743
15223
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
14744
15224
 
14745
- pm_constant_id_t old_param_name = parser->current_param_name;
14746
- parser->current_param_name = 0;
15225
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
14747
15226
  pm_parser_scope_push(parser, true);
14748
15227
  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14749
15228
 
@@ -14760,11 +15239,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14760
15239
  }
14761
15240
 
14762
15241
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
14763
-
14764
15242
  pm_constant_id_list_t locals = parser->current_scope->locals;
15243
+
14765
15244
  pm_parser_scope_pop(parser);
14766
- parser->current_param_name = old_param_name;
14767
15245
  pm_do_loop_stack_pop(parser);
15246
+ pm_parser_current_param_name_restore(parser, saved_param_name);
15247
+
14768
15248
  return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
14769
15249
  }
14770
15250
 
@@ -14790,9 +15270,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14790
15270
  superclass = NULL;
14791
15271
  }
14792
15272
 
14793
- pm_constant_id_t old_param_name = parser->current_param_name;
14794
- parser->current_param_name = 0;
15273
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
14795
15274
  pm_parser_scope_push(parser, true);
15275
+
14796
15276
  if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
14797
15277
  expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
14798
15278
  } else {
@@ -14818,9 +15298,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14818
15298
  }
14819
15299
 
14820
15300
  pm_constant_id_list_t locals = parser->current_scope->locals;
15301
+
14821
15302
  pm_parser_scope_pop(parser);
14822
- parser->current_param_name = old_param_name;
14823
15303
  pm_do_loop_stack_pop(parser);
15304
+ pm_parser_current_param_name_restore(parser, saved_param_name);
14824
15305
 
14825
15306
  if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
14826
15307
  pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
@@ -14835,18 +15316,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14835
15316
  pm_token_t operator = not_provided(parser);
14836
15317
  pm_token_t name = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = def_keyword.end, .end = def_keyword.end };
14837
15318
 
14838
- // This context is necessary for lexing `...` in a bare params correctly.
14839
- // It must be pushed before lexing the first param, so it is here.
15319
+ // This context is necessary for lexing `...` in a bare params
15320
+ // correctly. It must be pushed before lexing the first param, so it
15321
+ // is here.
14840
15322
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
15323
+ pm_constant_id_t saved_param_name;
15324
+
14841
15325
  parser_lex(parser);
14842
- pm_constant_id_t old_param_name = parser->current_param_name;
14843
15326
 
14844
15327
  switch (parser->current.type) {
14845
15328
  case PM_CASE_OPERATOR:
15329
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14846
15330
  pm_parser_scope_push(parser, true);
14847
- parser->current_param_name = 0;
14848
15331
  lex_state_set(parser, PM_LEX_STATE_ENDFN);
14849
15332
  parser_lex(parser);
15333
+
14850
15334
  name = parser->previous;
14851
15335
  break;
14852
15336
  case PM_TOKEN_IDENTIFIER: {
@@ -14854,18 +15338,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14854
15338
 
14855
15339
  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
14856
15340
  receiver = parse_variable_call(parser);
15341
+ receiver = pm_node_check_it(parser, receiver);
14857
15342
 
15343
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14858
15344
  pm_parser_scope_push(parser, true);
14859
- parser->current_param_name = 0;
14860
15345
  lex_state_set(parser, PM_LEX_STATE_FNAME);
14861
15346
  parser_lex(parser);
14862
15347
 
14863
15348
  operator = parser->previous;
14864
15349
  name = parse_method_definition_name(parser);
14865
15350
  } else {
15351
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14866
15352
  pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
14867
15353
  pm_parser_scope_push(parser, true);
14868
- parser->current_param_name = 0;
15354
+
14869
15355
  name = parser->previous;
14870
15356
  }
14871
15357
 
@@ -14882,9 +15368,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14882
15368
  case PM_TOKEN_KEYWORD___FILE__:
14883
15369
  case PM_TOKEN_KEYWORD___LINE__:
14884
15370
  case PM_TOKEN_KEYWORD___ENCODING__: {
15371
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14885
15372
  pm_parser_scope_push(parser, true);
14886
- parser->current_param_name = 0;
14887
15373
  parser_lex(parser);
15374
+
14888
15375
  pm_token_t identifier = parser->previous;
14889
15376
 
14890
15377
  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
@@ -14946,6 +15433,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14946
15433
  pm_token_t lparen = parser->previous;
14947
15434
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_DEF_RECEIVER);
14948
15435
 
15436
+ accept1(parser, PM_TOKEN_NEWLINE);
14949
15437
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
14950
15438
  pm_token_t rparen = parser->previous;
14951
15439
 
@@ -14955,8 +15443,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14955
15443
  operator = parser->previous;
14956
15444
  receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
14957
15445
 
15446
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14958
15447
  pm_parser_scope_push(parser, true);
14959
- parser->current_param_name = 0;
14960
15448
 
14961
15449
  // To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as described the above.
14962
15450
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
@@ -14964,8 +15452,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14964
15452
  break;
14965
15453
  }
14966
15454
  default:
15455
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14967
15456
  pm_parser_scope_push(parser, true);
14968
- parser->current_param_name = 0;
15457
+
14969
15458
  name = parse_method_definition_name(parser);
14970
15459
  break;
14971
15460
  }
@@ -15018,8 +15507,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15018
15507
  }
15019
15508
  }
15020
15509
 
15021
- uint32_t locals_body_index = (uint32_t) parser->current_scope->locals.size;
15022
-
15023
15510
  context_pop(parser);
15024
15511
  pm_node_t *statements = NULL;
15025
15512
  pm_token_t equal;
@@ -15080,8 +15567,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15080
15567
  }
15081
15568
 
15082
15569
  pm_constant_id_list_t locals = parser->current_scope->locals;
15083
- parser->current_param_name = old_param_name;
15570
+
15084
15571
  pm_parser_scope_pop(parser);
15572
+ pm_parser_current_param_name_restore(parser, saved_param_name);
15573
+
15574
+ /**
15575
+ * If the final character is @. As is the case when defining
15576
+ * methods to override the unary operators, we should ignore
15577
+ * the @ in the same way we do for symbols.
15578
+ */
15579
+ name.end = parse_operator_symbol_name(&name);
15085
15580
 
15086
15581
  return (pm_node_t *) pm_def_node_create(
15087
15582
  parser,
@@ -15090,7 +15585,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15090
15585
  params,
15091
15586
  statements,
15092
15587
  &locals,
15093
- locals_body_index,
15094
15588
  &def_keyword,
15095
15589
  &operator,
15096
15590
  &lparen,
@@ -15309,9 +15803,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15309
15803
  pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
15310
15804
  }
15311
15805
 
15312
- pm_constant_id_t old_param_name = parser->current_param_name;
15313
- parser->current_param_name = 0;
15806
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
15314
15807
  pm_parser_scope_push(parser, true);
15808
+
15315
15809
  accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
15316
15810
  pm_node_t *statements = NULL;
15317
15811
 
@@ -15328,7 +15822,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15328
15822
 
15329
15823
  pm_constant_id_list_t locals = parser->current_scope->locals;
15330
15824
  pm_parser_scope_pop(parser);
15331
- parser->current_param_name = old_param_name;
15825
+ pm_parser_current_param_name_restore(parser, saved_param_name);
15332
15826
 
15333
15827
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
15334
15828
 
@@ -15914,6 +16408,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15914
16408
  // context of a multiple assignment. We enforce that here. We'll
15915
16409
  // still lex past it though and create a missing node place.
15916
16410
  if (binding_power != PM_BINDING_POWER_STATEMENT) {
16411
+ pm_parser_err_previous(parser, diag_id);
15917
16412
  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
15918
16413
  }
15919
16414
 
@@ -15995,7 +16490,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15995
16490
  parser_lex(parser);
15996
16491
 
15997
16492
  pm_token_t operator = parser->previous;
16493
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
15998
16494
  pm_parser_scope_push(parser, false);
16495
+
15999
16496
  pm_block_parameters_node_t *block_parameters;
16000
16497
 
16001
16498
  switch (parser->current.type) {
@@ -16030,12 +16527,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16030
16527
  }
16031
16528
  }
16032
16529
 
16033
- uint32_t locals_body_index = 0;
16034
-
16035
- if (block_parameters) {
16036
- locals_body_index = (uint32_t) parser->current_scope->locals.size;
16037
- }
16038
-
16039
16530
  pm_token_t opening;
16040
16531
  pm_node_t *body = NULL;
16041
16532
  parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
@@ -16070,13 +16561,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16070
16561
 
16071
16562
  if (parameters == NULL && (maximum > 0)) {
16072
16563
  parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = operator.start, .end = parser->previous.end }, maximum);
16073
- locals_body_index = maximum;
16074
16564
  }
16075
16565
 
16076
16566
  pm_constant_id_list_t locals = parser->current_scope->locals;
16567
+
16077
16568
  pm_parser_scope_pop(parser);
16078
16569
  pm_accepts_block_stack_pop(parser);
16079
- return (pm_node_t *) pm_lambda_node_create(parser, &locals, locals_body_index, &operator, &opening, &parser->previous, parameters, body);
16570
+ pm_parser_current_param_name_restore(parser, saved_param_name);
16571
+
16572
+ return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
16080
16573
  }
16081
16574
  case PM_TOKEN_UPLUS: {
16082
16575
  parser_lex(parser);
@@ -16095,12 +16588,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16095
16588
 
16096
16589
  return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END);
16097
16590
  }
16098
- default:
16099
- if (context_recoverable(parser, &parser->current)) {
16591
+ default: {
16592
+ pm_context_t recoverable = context_recoverable(parser, &parser->current);
16593
+
16594
+ if (recoverable != PM_CONTEXT_NONE) {
16100
16595
  parser->recovering = true;
16596
+
16597
+ // If the given error is not the generic one, then we'll add it
16598
+ // here because it will provide more context in addition to the
16599
+ // recoverable error that we will also add.
16600
+ if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
16601
+ pm_parser_err_previous(parser, diag_id);
16602
+ }
16603
+
16604
+ // If we get here, then we are assuming this token is closing a
16605
+ // parent context, so we'll indicate that to the user so that
16606
+ // they know how we behaved.
16607
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
16608
+ } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
16609
+ // We're going to make a special case here, because "cannot
16610
+ // parse expression" is pretty generic, and we know here that we
16611
+ // have an unexpected token.
16612
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
16613
+ } else {
16614
+ pm_parser_err_previous(parser, diag_id);
16101
16615
  }
16102
16616
 
16103
16617
  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
16618
+ }
16104
16619
  }
16105
16620
  }
16106
16621
 
@@ -17063,15 +17578,12 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17063
17578
  */
17064
17579
  static pm_node_t *
17065
17580
  parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
17066
- pm_token_t recovery = parser->previous;
17067
- pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
17581
+ pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, diag_id);
17068
17582
 
17069
17583
  switch (PM_NODE_TYPE(node)) {
17070
17584
  case PM_MISSING_NODE:
17071
17585
  // If we found a syntax error, then the type of node returned by
17072
- // parse_expression_prefix is going to be a missing node. In that
17073
- // case we need to add the error message to the parser's error list.
17074
- pm_parser_err(parser, recovery.end, recovery.end, diag_id);
17586
+ // parse_expression_prefix is going to be a missing node.
17075
17587
  return node;
17076
17588
  case PM_PRE_EXECUTION_NODE:
17077
17589
  case PM_POST_EXECUTION_NODE:
@@ -17080,7 +17592,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
17080
17592
  case PM_UNDEF_NODE:
17081
17593
  // These expressions are statements, and cannot be followed by
17082
17594
  // operators (except modifiers).
17083
- if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER_RESCUE) {
17595
+ if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
17084
17596
  return node;
17085
17597
  }
17086
17598
  break;
@@ -17175,9 +17687,14 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
17175
17687
 
17176
17688
  static pm_node_t *
17177
17689
  parse_program(pm_parser_t *parser) {
17178
- pm_parser_scope_push(parser, !parser->current_scope);
17179
- parser_lex(parser);
17690
+ // If the current scope is NULL, then we want to push a new top level scope.
17691
+ // The current scope could exist in the event that we are parsing an eval
17692
+ // and the user has passed into scopes that already exist.
17693
+ if (parser->current_scope == NULL) {
17694
+ pm_parser_scope_push(parser, true);
17695
+ }
17180
17696
 
17697
+ parser_lex(parser);
17181
17698
  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN);
17182
17699
  if (!statements) {
17183
17700
  statements = pm_statements_node_create(parser);
@@ -17248,8 +17765,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17248
17765
  .in_keyword_arg = false,
17249
17766
  .current_param_name = 0,
17250
17767
  .semantic_token_seen = false,
17251
- .frozen_string_literal = false,
17252
- .suppress_warnings = false
17768
+ .frozen_string_literal = false
17253
17769
  };
17254
17770
 
17255
17771
  // Initialize the constant pool. We're going to completely guess as to the
@@ -17295,10 +17811,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17295
17811
  parser->frozen_string_literal = true;
17296
17812
  }
17297
17813
 
17298
- // suppress_warnings option
17299
- if (options->suppress_warnings) {
17300
- parser->suppress_warnings = true;
17301
- }
17814
+ // version option
17815
+ parser->version = options->version;
17302
17816
 
17303
17817
  // scopes option
17304
17818
  for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
@@ -17484,3 +17998,297 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
17484
17998
  #undef PM_LOCATION_NODE_VALUE
17485
17999
  #undef PM_LOCATION_NULL_VALUE
17486
18000
  #undef PM_LOCATION_TOKEN_VALUE
18001
+
18002
+ /** An error that is going to be formatted into the output. */
18003
+ typedef struct {
18004
+ /** A pointer to the diagnostic that was generated during parsing. */
18005
+ pm_diagnostic_t *error;
18006
+
18007
+ /** The start line of the diagnostic message. */
18008
+ uint32_t line;
18009
+
18010
+ /** The column start of the diagnostic message. */
18011
+ uint32_t column_start;
18012
+
18013
+ /** The column end of the diagnostic message. */
18014
+ uint32_t column_end;
18015
+ } pm_error_t;
18016
+
18017
+ /** The format that will be used to format the errors into the output. */
18018
+ typedef struct {
18019
+ /** The prefix that will be used for line numbers. */
18020
+ const char *number_prefix;
18021
+
18022
+ /** The prefix that will be used for blank lines. */
18023
+ const char *blank_prefix;
18024
+
18025
+ /** The divider that will be used between sections of source code. */
18026
+ const char *divider;
18027
+
18028
+ /** The length of the blank prefix. */
18029
+ size_t blank_prefix_length;
18030
+
18031
+ /** The length of the divider. */
18032
+ size_t divider_length;
18033
+ } pm_error_format_t;
18034
+
18035
+ #define PM_COLOR_GRAY "\033[38;5;102m"
18036
+ #define PM_COLOR_RED "\033[1;31m"
18037
+ #define PM_COLOR_RESET "\033[0m"
18038
+
18039
+ static inline pm_error_t *
18040
+ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
18041
+ pm_error_t *errors = calloc(error_list->size, sizeof(pm_error_t));
18042
+
18043
+ for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
18044
+ pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start);
18045
+ pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end);
18046
+
18047
+ // We're going to insert this error into the array in sorted order. We
18048
+ // do this by finding the first error that has a line number greater
18049
+ // than the current error and then inserting the current error before
18050
+ // that one.
18051
+ size_t index = 0;
18052
+ while (
18053
+ (index < error_list->size) &&
18054
+ (errors[index].error != NULL) &&
18055
+ (
18056
+ (errors[index].line < ((uint32_t) start.line)) ||
18057
+ (errors[index].line == ((uint32_t) start.line) && errors[index].column_start < ((uint32_t) start.column))
18058
+ )
18059
+ ) index++;
18060
+
18061
+ // Now we're going to shift all of the errors after this one down one
18062
+ // index to make room for the new error.
18063
+ memcpy(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
18064
+
18065
+ // Finally, we'll insert the error into the array.
18066
+ uint32_t column_end;
18067
+ if (start.line == end.line) {
18068
+ column_end = (uint32_t) end.column;
18069
+ } else {
18070
+ column_end = (uint32_t) (newline_list->offsets[start.line] - newline_list->offsets[start.line - 1] - 1);
18071
+ }
18072
+
18073
+ // Ensure we have at least one column of error.
18074
+ if (((uint32_t) start.column) == column_end) column_end++;
18075
+
18076
+ errors[index] = (pm_error_t) {
18077
+ .error = error,
18078
+ .line = (uint32_t) start.line,
18079
+ .column_start = (uint32_t) start.column,
18080
+ .column_end = column_end
18081
+ };
18082
+ }
18083
+
18084
+ return errors;
18085
+ }
18086
+
18087
+ static inline void
18088
+ pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, size_t line, pm_buffer_t *buffer) {
18089
+ const uint8_t *start = &parser->start[newline_list->offsets[line - 1]];
18090
+ const uint8_t *end;
18091
+
18092
+ if (line >= newline_list->size) {
18093
+ end = parser->end;
18094
+ } else {
18095
+ end = &parser->start[newline_list->offsets[line]];
18096
+ }
18097
+
18098
+ pm_buffer_append_format(buffer, number_prefix, (uint32_t) line);
18099
+ pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
18100
+
18101
+ if (end == parser->end && end[-1] != '\n') {
18102
+ pm_buffer_append_string(buffer, "\n", 1);
18103
+ }
18104
+ }
18105
+
18106
+ /**
18107
+ * Format the errors on the parser into the given buffer.
18108
+ */
18109
+ PRISM_EXPORTED_FUNCTION void
18110
+ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize) {
18111
+ const pm_list_t *error_list = &parser->error_list;
18112
+ assert(error_list->size != 0);
18113
+
18114
+ // First, we're going to sort all of the errors by line number using an
18115
+ // insertion sort into a newly allocated array.
18116
+ const pm_newline_list_t *newline_list = &parser->newline_list;
18117
+ pm_error_t *errors = pm_parser_errors_format_sort(error_list, newline_list);
18118
+
18119
+ // Now we're going to determine how we're going to format line numbers and
18120
+ // blank lines based on the maximum number of digits in the line numbers
18121
+ // that are going to be displayed.
18122
+ pm_error_format_t error_format;
18123
+ size_t max_line_number = errors[error_list->size - 1].line;
18124
+
18125
+ if (max_line_number < 10) {
18126
+ if (colorize) {
18127
+ error_format = (pm_error_format_t) {
18128
+ .number_prefix = PM_COLOR_GRAY "%1" PRIu32 " | " PM_COLOR_RESET,
18129
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18130
+ .divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
18131
+ };
18132
+ } else {
18133
+ error_format = (pm_error_format_t) {
18134
+ .number_prefix = "%1" PRIu32 " | ",
18135
+ .blank_prefix = " | ",
18136
+ .divider = " ~~~~~\n"
18137
+ };
18138
+ }
18139
+ } else if (max_line_number < 100) {
18140
+ if (colorize) {
18141
+ error_format = (pm_error_format_t) {
18142
+ .number_prefix = PM_COLOR_GRAY "%2" PRIu32 " | " PM_COLOR_RESET,
18143
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18144
+ .divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
18145
+ };
18146
+ } else {
18147
+ error_format = (pm_error_format_t) {
18148
+ .number_prefix = "%2" PRIu32 " | ",
18149
+ .blank_prefix = " | ",
18150
+ .divider = " ~~~~~~\n"
18151
+ };
18152
+ }
18153
+ } else if (max_line_number < 1000) {
18154
+ if (colorize) {
18155
+ error_format = (pm_error_format_t) {
18156
+ .number_prefix = PM_COLOR_GRAY "%3" PRIu32 " | " PM_COLOR_RESET,
18157
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18158
+ .divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
18159
+ };
18160
+ } else {
18161
+ error_format = (pm_error_format_t) {
18162
+ .number_prefix = "%3" PRIu32 " | ",
18163
+ .blank_prefix = " | ",
18164
+ .divider = " ~~~~~~~\n"
18165
+ };
18166
+ }
18167
+ } else if (max_line_number < 10000) {
18168
+ if (colorize) {
18169
+ error_format = (pm_error_format_t) {
18170
+ .number_prefix = PM_COLOR_GRAY "%4" PRIu32 " | " PM_COLOR_RESET,
18171
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18172
+ .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
18173
+ };
18174
+ } else {
18175
+ error_format = (pm_error_format_t) {
18176
+ .number_prefix = "%4" PRIu32 " | ",
18177
+ .blank_prefix = " | ",
18178
+ .divider = " ~~~~~~~~\n"
18179
+ };
18180
+ }
18181
+ } else {
18182
+ if (colorize) {
18183
+ error_format = (pm_error_format_t) {
18184
+ .number_prefix = PM_COLOR_GRAY "%5" PRIu32 " | " PM_COLOR_RESET,
18185
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18186
+ .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
18187
+ };
18188
+ } else {
18189
+ error_format = (pm_error_format_t) {
18190
+ .number_prefix = "%5" PRIu32 " | ",
18191
+ .blank_prefix = " | ",
18192
+ .divider = " ~~~~~~~~\n"
18193
+ };
18194
+ }
18195
+ }
18196
+
18197
+ error_format.blank_prefix_length = strlen(error_format.blank_prefix);
18198
+ error_format.divider_length = strlen(error_format.divider);
18199
+
18200
+ // Now we're going to iterate through every error in our error list and
18201
+ // display it. While we're iterating, we will display some padding lines of
18202
+ // the source before the error to give some context. We'll be careful not to
18203
+ // display the same line twice in case the errors are close enough in the
18204
+ // source.
18205
+ uint32_t last_line = 0;
18206
+ const pm_encoding_t *encoding = parser->encoding;
18207
+
18208
+ for (size_t index = 0; index < error_list->size; index++) {
18209
+ pm_error_t *error = &errors[index];
18210
+
18211
+ // Here we determine how many lines of padding of the source to display,
18212
+ // based on the difference from the last line that was displayed.
18213
+ if (error->line - last_line > 1) {
18214
+ if (error->line - last_line > 2) {
18215
+ if ((index != 0) && (error->line - last_line > 3)) {
18216
+ pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
18217
+ }
18218
+
18219
+ pm_buffer_append_string(buffer, " ", 2);
18220
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
18221
+ }
18222
+
18223
+ pm_buffer_append_string(buffer, " ", 2);
18224
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
18225
+ }
18226
+
18227
+ // If this is the first error or we're on a new line, then we'll display
18228
+ // the line that has the error in it.
18229
+ if ((index == 0) || (error->line != last_line)) {
18230
+ if (colorize) {
18231
+ pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 13);
18232
+ } else {
18233
+ pm_buffer_append_string(buffer, "> ", 2);
18234
+ }
18235
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
18236
+ }
18237
+
18238
+ // Now we'll display the actual error message. We'll do this by first
18239
+ // putting the prefix to the line, then a bunch of blank spaces
18240
+ // depending on the column, then as many carets as we need to display
18241
+ // the width of the error, then the error message itself.
18242
+ //
18243
+ // Note that this doesn't take into account the width of the actual
18244
+ // character when displayed in the terminal. For some east-asian
18245
+ // languages or emoji, this means it can be thrown off pretty badly. We
18246
+ // will need to solve this eventually.
18247
+ pm_buffer_append_string(buffer, " ", 2);
18248
+ pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
18249
+
18250
+ size_t column = 0;
18251
+ const uint8_t *start = &parser->start[newline_list->offsets[error->line - 1]];
18252
+
18253
+ while (column < error->column_end) {
18254
+ if (column < error->column_start) {
18255
+ pm_buffer_append_byte(buffer, ' ');
18256
+ } else if (colorize) {
18257
+ pm_buffer_append_string(buffer, PM_COLOR_RED "^" PM_COLOR_RESET, 12);
18258
+ } else {
18259
+ pm_buffer_append_byte(buffer, '^');
18260
+ }
18261
+
18262
+ size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
18263
+ column += (char_width == 0 ? 1 : char_width);
18264
+ }
18265
+
18266
+ pm_buffer_append_byte(buffer, ' ');
18267
+
18268
+ const char *message = error->error->message;
18269
+ pm_buffer_append_string(buffer, message, strlen(message));
18270
+ pm_buffer_append_byte(buffer, '\n');
18271
+
18272
+ // Here we determine how many lines of padding to display after the
18273
+ // error, depending on where the next error is in source.
18274
+ last_line = error->line;
18275
+ size_t next_line = (index == error_list->size - 1) ? newline_list->size : errors[index + 1].line;
18276
+
18277
+ if (next_line - last_line > 1) {
18278
+ pm_buffer_append_string(buffer, " ", 2);
18279
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
18280
+ }
18281
+
18282
+ if (next_line - last_line > 1) {
18283
+ pm_buffer_append_string(buffer, " ", 2);
18284
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
18285
+ }
18286
+ }
18287
+
18288
+ // Finally, we'll free the array of errors that we allocated.
18289
+ free(errors);
18290
+ }
18291
+
18292
+ #undef PM_COLOR_GRAY
18293
+ #undef PM_COLOR_RED
18294
+ #undef PM_COLOR_RESET