prism 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +46 -1
  3. data/Makefile +1 -1
  4. data/config.yml +429 -2
  5. data/docs/build_system.md +8 -11
  6. data/docs/releasing.md +1 -1
  7. data/docs/relocation.md +34 -0
  8. data/docs/ruby_api.md +1 -1
  9. data/ext/prism/api_node.c +1824 -1305
  10. data/ext/prism/extconf.rb +13 -36
  11. data/ext/prism/extension.c +298 -109
  12. data/ext/prism/extension.h +4 -4
  13. data/include/prism/ast.h +442 -2
  14. data/include/prism/defines.h +26 -8
  15. data/include/prism/options.h +47 -1
  16. data/include/prism/util/pm_buffer.h +10 -0
  17. data/include/prism/version.h +2 -2
  18. data/include/prism.h +51 -4
  19. data/lib/prism/dot_visitor.rb +26 -0
  20. data/lib/prism/dsl.rb +14 -6
  21. data/lib/prism/ffi.rb +93 -28
  22. data/lib/prism/inspect_visitor.rb +4 -1
  23. data/lib/prism/node.rb +1886 -105
  24. data/lib/prism/parse_result/errors.rb +1 -1
  25. data/lib/prism/parse_result/newlines.rb +1 -1
  26. data/lib/prism/parse_result.rb +54 -2
  27. data/lib/prism/polyfill/append_as_bytes.rb +15 -0
  28. data/lib/prism/reflection.rb +4 -4
  29. data/lib/prism/relocation.rb +504 -0
  30. data/lib/prism/serialize.rb +1252 -765
  31. data/lib/prism/string_query.rb +30 -0
  32. data/lib/prism/translation/parser/builder.rb +61 -0
  33. data/lib/prism/translation/parser/compiler.rb +228 -162
  34. data/lib/prism/translation/parser/lexer.rb +435 -61
  35. data/lib/prism/translation/parser.rb +51 -3
  36. data/lib/prism/translation/parser35.rb +12 -0
  37. data/lib/prism/translation/ripper.rb +13 -3
  38. data/lib/prism/translation/ruby_parser.rb +17 -7
  39. data/lib/prism/translation.rb +1 -0
  40. data/lib/prism.rb +9 -7
  41. data/prism.gemspec +11 -1
  42. data/rbi/prism/dsl.rbi +10 -7
  43. data/rbi/prism/node.rbi +44 -17
  44. data/rbi/prism/parse_result.rbi +17 -0
  45. data/rbi/prism/string_query.rbi +12 -0
  46. data/rbi/prism/translation/parser35.rbi +6 -0
  47. data/rbi/prism.rbi +39 -36
  48. data/sig/prism/dsl.rbs +6 -4
  49. data/sig/prism/node.rbs +29 -15
  50. data/sig/prism/parse_result.rbs +10 -0
  51. data/sig/prism/relocation.rbs +185 -0
  52. data/sig/prism/serialize.rbs +4 -2
  53. data/sig/prism/string_query.rbs +11 -0
  54. data/sig/prism.rbs +22 -1
  55. data/src/diagnostic.c +2 -2
  56. data/src/node.c +39 -0
  57. data/src/options.c +31 -0
  58. data/src/prettyprint.c +62 -0
  59. data/src/prism.c +738 -199
  60. data/src/regexp.c +7 -3
  61. data/src/serialize.c +18 -0
  62. data/src/static_literals.c +1 -1
  63. data/src/util/pm_buffer.c +40 -0
  64. data/src/util/pm_char.c +1 -1
  65. data/src/util/pm_constant_pool.c +6 -2
  66. data/src/util/pm_string.c +1 -0
  67. data/src/util/pm_strncasecmp.c +13 -1
  68. metadata +13 -7
data/src/prism.c CHANGED
@@ -1649,22 +1649,25 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
1649
1649
  * the function pointer or can just directly use the UTF-8 functions.
1650
1650
  */
1651
1651
  static inline size_t
1652
- char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
1652
+ char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1653
+ if (n <= 0) return 0;
1654
+
1653
1655
  if (parser->encoding_changed) {
1654
1656
  size_t width;
1655
- if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
1657
+
1658
+ if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1656
1659
  return width;
1657
1660
  } else if (*b == '_') {
1658
1661
  return 1;
1659
1662
  } else if (*b >= 0x80) {
1660
- return parser->encoding->char_width(b, parser->end - b);
1663
+ return parser->encoding->char_width(b, n);
1661
1664
  } else {
1662
1665
  return 0;
1663
1666
  }
1664
1667
  } else if (*b < 0x80) {
1665
1668
  return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1666
1669
  } else {
1667
- return pm_encoding_utf_8_char_width(b, parser->end - b);
1670
+ return pm_encoding_utf_8_char_width(b, n);
1668
1671
  }
1669
1672
  }
1670
1673
 
@@ -1673,11 +1676,13 @@ char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
1673
1676
  * has not been changed.
1674
1677
  */
1675
1678
  static inline size_t
1676
- char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
1677
- if (*b < 0x80) {
1679
+ char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1680
+ if (n <= 0) {
1681
+ return 0;
1682
+ } else if (*b < 0x80) {
1678
1683
  return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1679
1684
  } else {
1680
- return pm_encoding_utf_8_char_width(b, end - b);
1685
+ return pm_encoding_utf_8_char_width(b, n);
1681
1686
  }
1682
1687
  }
1683
1688
 
@@ -1687,20 +1692,24 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
1687
1692
  * it's important that it be as fast as possible.
1688
1693
  */
1689
1694
  static inline size_t
1690
- char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
1691
- if (parser->encoding_changed) {
1695
+ char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1696
+ if (n <= 0) {
1697
+ return 0;
1698
+ } else if (parser->encoding_changed) {
1692
1699
  size_t width;
1693
- if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
1700
+
1701
+ if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1694
1702
  return width;
1695
1703
  } else if (*b == '_') {
1696
1704
  return 1;
1697
1705
  } else if (*b >= 0x80) {
1698
- return parser->encoding->char_width(b, parser->end - b);
1706
+ return parser->encoding->char_width(b, n);
1699
1707
  } else {
1700
1708
  return 0;
1701
1709
  }
1710
+ } else {
1711
+ return char_is_identifier_utf8(b, n);
1702
1712
  }
1703
- return char_is_identifier_utf8(b, parser->end);
1704
1713
  }
1705
1714
 
1706
1715
  // Here we're defining a perfect hash for the characters that are allowed in
@@ -1731,9 +1740,10 @@ char_is_global_name_punctuation(const uint8_t b) {
1731
1740
  static inline bool
1732
1741
  token_is_setter_name(pm_token_t *token) {
1733
1742
  return (
1734
- (token->type == PM_TOKEN_IDENTIFIER) &&
1743
+ (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1744
+ ((token->type == PM_TOKEN_IDENTIFIER) &&
1735
1745
  (token->end - token->start >= 2) &&
1736
- (token->end[-1] == '=')
1746
+ (token->end[-1] == '='))
1737
1747
  );
1738
1748
  }
1739
1749
 
@@ -2895,7 +2905,7 @@ pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2895
2905
  (node->message_loc.start != NULL) &&
2896
2906
  (node->message_loc.end[-1] != '!') &&
2897
2907
  (node->message_loc.end[-1] != '?') &&
2898
- char_is_identifier_start(parser, node->message_loc.start) &&
2908
+ char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
2899
2909
  (node->opening_loc.start == NULL) &&
2900
2910
  (node->arguments == NULL) &&
2901
2911
  (node->block == NULL)
@@ -4142,7 +4152,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4142
4152
 
4143
4153
  // If errno is set, then it should only be ERANGE. At this point we need to
4144
4154
  // check if it's infinity (it should be).
4145
- if (errno == ERANGE && isinf(value)) {
4155
+ if (errno == ERANGE && PRISM_ISINF(value)) {
4146
4156
  int warn_width;
4147
4157
  const char *ellipsis;
4148
4158
 
@@ -5318,6 +5328,12 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_
5318
5328
  // should clear the mutability flags.
5319
5329
  CLEAR_FLAGS(node);
5320
5330
  break;
5331
+ case PM_X_STRING_NODE:
5332
+ case PM_INTERPOLATED_X_STRING_NODE:
5333
+ // If this is an x string, then this is a syntax error. But we want
5334
+ // to handle it here so that we don't fail the assertion.
5335
+ CLEAR_FLAGS(node);
5336
+ break;
5321
5337
  default:
5322
5338
  assert(false && "unexpected node type");
5323
5339
  break;
@@ -5652,7 +5668,7 @@ pm_lambda_node_create(
5652
5668
  */
5653
5669
  static pm_local_variable_and_write_node_t *
5654
5670
  pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5655
- assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5671
+ assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5656
5672
  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5657
5673
  pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5658
5674
 
@@ -5707,7 +5723,7 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
5707
5723
  */
5708
5724
  static pm_local_variable_or_write_node_t *
5709
5725
  pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5710
- assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5726
+ assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5711
5727
  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5712
5728
  pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5713
5729
 
@@ -6159,7 +6175,10 @@ pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *to
6159
6175
  const uint8_t *end = token->end;
6160
6176
 
6161
6177
  ptrdiff_t diff = end - start;
6162
- assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
6178
+ assert(diff > 0);
6179
+ #if PTRDIFF_MAX > SIZE_MAX
6180
+ assert(diff < (ptrdiff_t) SIZE_MAX);
6181
+ #endif
6163
6182
  size_t length = (size_t) diff;
6164
6183
 
6165
6184
  char *digits = xcalloc(length + 1, sizeof(char));
@@ -6393,12 +6412,13 @@ pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_st
6393
6412
  * Allocate and initialize new ParenthesesNode node.
6394
6413
  */
6395
6414
  static pm_parentheses_node_t *
6396
- pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing) {
6415
+ pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
6397
6416
  pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
6398
6417
 
6399
6418
  *node = (pm_parentheses_node_t) {
6400
6419
  {
6401
6420
  .type = PM_PARENTHESES_NODE,
6421
+ .flags = flags,
6402
6422
  .node_id = PM_NODE_IDENTIFY(parser),
6403
6423
  .location = {
6404
6424
  .start = opening->start,
@@ -6665,6 +6685,7 @@ pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6665
6685
  },
6666
6686
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6667
6687
  .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6688
+ .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6668
6689
  .reference = NULL,
6669
6690
  .statements = NULL,
6670
6691
  .subsequent = NULL,
@@ -7684,7 +7705,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
7684
7705
  * Allocate a new UntilNode node.
7685
7706
  */
7686
7707
  static pm_until_node_t *
7687
- pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7708
+ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7688
7709
  pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7689
7710
  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7690
7711
 
@@ -7699,6 +7720,7 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
7699
7720
  },
7700
7721
  },
7701
7722
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7723
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7702
7724
  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7703
7725
  .predicate = predicate,
7704
7726
  .statements = statements
@@ -7727,6 +7749,7 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
7727
7749
  },
7728
7750
  },
7729
7751
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7752
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7730
7753
  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7731
7754
  .predicate = predicate,
7732
7755
  .statements = statements
@@ -7794,7 +7817,7 @@ pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statemen
7794
7817
  * Allocate a new WhileNode node.
7795
7818
  */
7796
7819
  static pm_while_node_t *
7797
- pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7820
+ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7798
7821
  pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7799
7822
  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7800
7823
 
@@ -7809,6 +7832,7 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
7809
7832
  },
7810
7833
  },
7811
7834
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7835
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7812
7836
  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7813
7837
  .predicate = predicate,
7814
7838
  .statements = statements
@@ -7837,6 +7861,7 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
7837
7861
  },
7838
7862
  },
7839
7863
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7864
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7840
7865
  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7841
7866
  .predicate = predicate,
7842
7867
  .statements = statements
@@ -7859,6 +7884,7 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
7859
7884
  .location = PM_LOCATION_NULL_VALUE(parser)
7860
7885
  },
7861
7886
  .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7887
+ .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7862
7888
  .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7863
7889
  .predicate = predicate,
7864
7890
  .statements = statements
@@ -9077,10 +9103,10 @@ lex_global_variable(pm_parser_t *parser) {
9077
9103
  parser->current.end++;
9078
9104
  size_t width;
9079
9105
 
9080
- if (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
9106
+ if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9081
9107
  do {
9082
9108
  parser->current.end += width;
9083
- } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9109
+ } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9084
9110
 
9085
9111
  // $0 isn't allowed to be followed by anything.
9086
9112
  pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
@@ -9105,14 +9131,14 @@ lex_global_variable(pm_parser_t *parser) {
9105
9131
  case '-':
9106
9132
  parser->current.end++;
9107
9133
  allow_multiple = false;
9108
- /* fallthrough */
9134
+ PRISM_FALLTHROUGH
9109
9135
  default: {
9110
9136
  size_t width;
9111
9137
 
9112
- if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
9138
+ if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9113
9139
  do {
9114
9140
  parser->current.end += width;
9115
- } while (allow_multiple && parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9141
+ } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9116
9142
  } else if (pm_char_is_whitespace(peek(parser))) {
9117
9143
  // If we get here, then we have a $ followed by whitespace,
9118
9144
  // which is not allowed.
@@ -9177,11 +9203,11 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
9177
9203
  bool encoding_changed = parser->encoding_changed;
9178
9204
 
9179
9205
  if (encoding_changed) {
9180
- while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
9206
+ while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
9181
9207
  current_end += width;
9182
9208
  }
9183
9209
  } else {
9184
- while (current_end < end && (width = char_is_identifier_utf8(current_end, end)) > 0) {
9210
+ while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
9185
9211
  current_end += width;
9186
9212
  }
9187
9213
  }
@@ -9355,7 +9381,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
9355
9381
  const uint8_t *variable = pound + 2;
9356
9382
  if (*variable == '@' && pound + 3 < parser->end) variable++;
9357
9383
 
9358
- if (char_is_identifier_start(parser, variable)) {
9384
+ if (char_is_identifier_start(parser, variable, parser->end - variable)) {
9359
9385
  // At this point we're sure that we've either hit an embedded instance
9360
9386
  // or class variable. In this case we'll first need to check if we've
9361
9387
  // already consumed content.
@@ -9404,7 +9430,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
9404
9430
  // or a global name punctuation character, then we've hit an embedded
9405
9431
  // global variable.
9406
9432
  if (
9407
- char_is_identifier_start(parser, check) ||
9433
+ char_is_identifier_start(parser, check, parser->end - check) ||
9408
9434
  (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
9409
9435
  ) {
9410
9436
  // In this case we've hit an embedded global variable. First check to
@@ -9536,21 +9562,7 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t fla
9536
9562
  parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
9537
9563
  }
9538
9564
 
9539
- if (value <= 0x7F) { // 0xxxxxxx
9540
- pm_buffer_append_byte(buffer, (uint8_t) value);
9541
- } else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
9542
- pm_buffer_append_byte(buffer, (uint8_t) (0xC0 | (value >> 6)));
9543
- pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9544
- } else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
9545
- pm_buffer_append_byte(buffer, (uint8_t) (0xE0 | (value >> 12)));
9546
- pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
9547
- pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9548
- } else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
9549
- pm_buffer_append_byte(buffer, (uint8_t) (0xF0 | (value >> 18)));
9550
- pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
9551
- pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
9552
- pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9553
- } else {
9565
+ if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
9554
9566
  pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
9555
9567
  pm_buffer_append_byte(buffer, 0xEF);
9556
9568
  pm_buffer_append_byte(buffer, 0xBF);
@@ -9575,28 +9587,6 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
9575
9587
  pm_buffer_append_byte(buffer, byte);
9576
9588
  }
9577
9589
 
9578
- /**
9579
- * Write each byte of the given escaped character into the buffer.
9580
- */
9581
- static inline void
9582
- escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
9583
- size_t width;
9584
- if (parser->encoding_changed) {
9585
- width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9586
- } else {
9587
- width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9588
- }
9589
-
9590
- // TODO: If the character is invalid in the given encoding, then we'll just
9591
- // push one byte into the buffer. This should actually be an error.
9592
- width = (width == 0) ? 1 : width;
9593
-
9594
- for (size_t index = 0; index < width; index++) {
9595
- escape_write_byte_encoded(parser, buffer, *parser->current.end);
9596
- parser->current.end++;
9597
- }
9598
- }
9599
-
9600
9590
  /**
9601
9591
  * The regular expression engine doesn't support the same escape sequences as
9602
9592
  * Ruby does. So first we have to read the escape sequence, and then we have to
@@ -9621,6 +9611,33 @@ escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular
9621
9611
  escape_write_byte_encoded(parser, buffer, byte);
9622
9612
  }
9623
9613
 
9614
+ /**
9615
+ * Write each byte of the given escaped character into the buffer.
9616
+ */
9617
+ static inline void
9618
+ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9619
+ size_t width;
9620
+ if (parser->encoding_changed) {
9621
+ width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9622
+ } else {
9623
+ width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9624
+ }
9625
+
9626
+ if (width == 1) {
9627
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
9628
+ } else if (width > 1) {
9629
+ // Valid multibyte character. Just ignore escape.
9630
+ pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
9631
+ pm_buffer_append_bytes(b, parser->current.end, width);
9632
+ parser->current.end += width;
9633
+ } else {
9634
+ // Assume the next character wasn't meant to be part of this escape
9635
+ // sequence since it is invalid. Add an error and move on.
9636
+ parser->current.end++;
9637
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9638
+ }
9639
+ }
9640
+
9624
9641
  /**
9625
9642
  * Warn about using a space or a tab character in an escape, as opposed to using
9626
9643
  * \\s or \\t. Note that we can quite copy the source because the warning
@@ -9647,7 +9664,8 @@ escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *t
9647
9664
  */
9648
9665
  static void
9649
9666
  escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9650
- switch (peek(parser)) {
9667
+ uint8_t peeked = peek(parser);
9668
+ switch (peeked) {
9651
9669
  case '\\': {
9652
9670
  parser->current.end++;
9653
9671
  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
@@ -9717,6 +9735,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9717
9735
  }
9718
9736
  }
9719
9737
 
9738
+ value = escape_byte(value, flags);
9720
9739
  escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9721
9740
  return;
9722
9741
  }
@@ -9765,7 +9784,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9765
9784
 
9766
9785
  size_t whitespace;
9767
9786
  while (true) {
9768
- if ((whitespace = pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9787
+ if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9769
9788
  parser->current.end += whitespace;
9770
9789
  } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9771
9790
  // This is super hacky, but it gets us nicer error
@@ -9813,7 +9832,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9813
9832
  uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9814
9833
  escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9815
9834
 
9816
- parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
9835
+ parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
9817
9836
  }
9818
9837
 
9819
9838
  // ?\u{nnnn} character literal should contain only one codepoint
@@ -10041,11 +10060,16 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
10041
10060
  escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10042
10061
  return;
10043
10062
  }
10063
+ PRISM_FALLTHROUGH
10044
10064
  }
10045
- /* fallthrough */
10046
10065
  default: {
10066
+ if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
10067
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10068
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10069
+ return;
10070
+ }
10047
10071
  if (parser->current.end < parser->end) {
10048
- escape_write_escape_encoded(parser, buffer);
10072
+ escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
10049
10073
  } else {
10050
10074
  pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10051
10075
  }
@@ -10118,7 +10142,7 @@ lex_question_mark(pm_parser_t *parser) {
10118
10142
  !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
10119
10143
  (
10120
10144
  (parser->current.end + encoding_width >= parser->end) ||
10121
- !char_is_identifier(parser, parser->current.end + encoding_width)
10145
+ !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
10122
10146
  )
10123
10147
  ) {
10124
10148
  lex_state_set(parser, PM_LEX_STATE_END);
@@ -10138,21 +10162,22 @@ lex_question_mark(pm_parser_t *parser) {
10138
10162
  static pm_token_type_t
10139
10163
  lex_at_variable(pm_parser_t *parser) {
10140
10164
  pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
10141
- size_t width;
10165
+ const uint8_t *end = parser->end;
10142
10166
 
10143
- if (parser->current.end < parser->end && (width = char_is_identifier_start(parser, parser->current.end)) > 0) {
10167
+ size_t width;
10168
+ if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
10144
10169
  parser->current.end += width;
10145
10170
 
10146
- while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
10171
+ while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
10147
10172
  parser->current.end += width;
10148
10173
  }
10149
- } else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
10174
+ } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
10150
10175
  pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
10151
10176
  if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
10152
10177
  diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
10153
10178
  }
10154
10179
 
10155
- size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10180
+ size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
10156
10181
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
10157
10182
  } else {
10158
10183
  pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
@@ -10496,6 +10521,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10496
10521
  }
10497
10522
 
10498
10523
  const uint8_t *end = parser->current.end - 1;
10524
+ assert(end >= start);
10499
10525
  pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10500
10526
 
10501
10527
  token_buffer->cursor = end;
@@ -10576,9 +10602,15 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
10576
10602
  pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10577
10603
  }
10578
10604
 
10579
- const uint8_t delimiter = *parser->current.end;
10580
- parser->current.end += eol_length;
10605
+ uint8_t delimiter = *parser->current.end;
10606
+
10607
+ // If our delimiter is \r\n, we want to treat it as if it's \n.
10608
+ // For example, %\r\nfoo\r\n should be "foo"
10609
+ if (eol_length == 2) {
10610
+ delimiter = *(parser->current.end + 1);
10611
+ }
10581
10612
 
10613
+ parser->current.end += eol_length;
10582
10614
  return delimiter;
10583
10615
  }
10584
10616
 
@@ -10688,6 +10720,14 @@ parser_lex(pm_parser_t *parser) {
10688
10720
  // We'll check if we're at the end of the file. If we are, then we
10689
10721
  // need to return the EOF token.
10690
10722
  if (parser->current.end >= parser->end) {
10723
+ // If we hit EOF, but the EOF came immediately after a newline,
10724
+ // set the start of the token to the newline. This way any EOF
10725
+ // errors will be reported as happening on that line rather than
10726
+ // a line after. For example "foo(\n" should report an error
10727
+ // on line 1 even though EOF technically occurs on line 2.
10728
+ if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10729
+ parser->current.start -= 1;
10730
+ }
10691
10731
  LEX(PM_TOKEN_EOF);
10692
10732
  }
10693
10733
 
@@ -10730,7 +10770,7 @@ parser_lex(pm_parser_t *parser) {
10730
10770
 
10731
10771
  lexed_comment = true;
10732
10772
  }
10733
- /* fallthrough */
10773
+ PRISM_FALLTHROUGH
10734
10774
  case '\r':
10735
10775
  case '\n': {
10736
10776
  parser->semantic_token_seen = semantic_token_seen & 0x1;
@@ -10772,7 +10812,7 @@ parser_lex(pm_parser_t *parser) {
10772
10812
  parser->current.type = PM_TOKEN_NEWLINE;
10773
10813
  return;
10774
10814
  }
10775
- /* fallthrough */
10815
+ PRISM_FALLTHROUGH
10776
10816
  case PM_IGNORED_NEWLINE_ALL:
10777
10817
  if (!lexed_comment) parser_lex_ignored_newline(parser);
10778
10818
  lexed_comment = false;
@@ -10869,6 +10909,10 @@ parser_lex(pm_parser_t *parser) {
10869
10909
 
10870
10910
  // ,
10871
10911
  case ',':
10912
+ if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10913
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10914
+ }
10915
+
10872
10916
  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10873
10917
  LEX(PM_TOKEN_COMMA);
10874
10918
 
@@ -11121,13 +11165,13 @@ parser_lex(pm_parser_t *parser) {
11121
11165
 
11122
11166
  if (parser->current.end >= parser->end) {
11123
11167
  parser->current.end = end;
11124
- } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
11168
+ } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
11125
11169
  parser->current.end = end;
11126
11170
  } else {
11127
11171
  if (quote == PM_HEREDOC_QUOTE_NONE) {
11128
11172
  parser->current.end += width;
11129
11173
 
11130
- while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) {
11174
+ while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
11131
11175
  parser->current.end += width;
11132
11176
  }
11133
11177
  } else {
@@ -11312,7 +11356,7 @@ parser_lex(pm_parser_t *parser) {
11312
11356
  } else {
11313
11357
  const uint8_t delim = peek_offset(parser, 1);
11314
11358
 
11315
- if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1)) {
11359
+ if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
11316
11360
  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11317
11361
  }
11318
11362
  }
@@ -11750,7 +11794,7 @@ parser_lex(pm_parser_t *parser) {
11750
11794
 
11751
11795
  default: {
11752
11796
  if (*parser->current.start != '_') {
11753
- size_t width = char_is_identifier_start(parser, parser->current.start);
11797
+ size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
11754
11798
 
11755
11799
  // If this isn't the beginning of an identifier, then
11756
11800
  // it's an invalid token as we've exhausted all of the
@@ -11783,7 +11827,7 @@ parser_lex(pm_parser_t *parser) {
11783
11827
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11784
11828
  break;
11785
11829
  }
11786
- /* fallthrough */
11830
+ PRISM_FALLTHROUGH
11787
11831
  default:
11788
11832
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11789
11833
  break;
@@ -11980,7 +12024,7 @@ parser_lex(pm_parser_t *parser) {
11980
12024
  pm_token_buffer_push_byte(&token_buffer, '\r');
11981
12025
  break;
11982
12026
  }
11983
- /* fallthrough */
12027
+ PRISM_FALLTHROUGH
11984
12028
  case '\n':
11985
12029
  pm_token_buffer_push_byte(&token_buffer, '\n');
11986
12030
 
@@ -12084,9 +12128,28 @@ parser_lex(pm_parser_t *parser) {
12084
12128
  pm_regexp_token_buffer_t token_buffer = { 0 };
12085
12129
 
12086
12130
  while (breakpoint != NULL) {
12131
+ uint8_t term = lex_mode->as.regexp.terminator;
12132
+ bool is_terminator = (*breakpoint == term);
12133
+
12134
+ // If the terminator is newline, we need to consider \r\n _also_ a newline
12135
+ // For example: `%\nfoo\r\n`
12136
+ // The string should be "foo", not "foo\r"
12137
+ if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12138
+ if (term == '\n') {
12139
+ is_terminator = true;
12140
+ }
12141
+
12142
+ // If the terminator is a CR, but we see a CRLF, we need to
12143
+ // treat the CRLF as a newline, meaning this is _not_ the
12144
+ // terminator
12145
+ if (term == '\r') {
12146
+ is_terminator = false;
12147
+ }
12148
+ }
12149
+
12087
12150
  // If we hit the terminator, we need to determine what kind of
12088
12151
  // token to return.
12089
- if (*breakpoint == lex_mode->as.regexp.terminator) {
12152
+ if (is_terminator) {
12090
12153
  if (lex_mode->as.regexp.nesting > 0) {
12091
12154
  parser->current.end = breakpoint + 1;
12092
12155
  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
@@ -12148,7 +12211,7 @@ parser_lex(pm_parser_t *parser) {
12148
12211
  pm_regexp_token_buffer_escape(parser, &token_buffer);
12149
12212
  token_buffer.base.cursor = breakpoint;
12150
12213
 
12151
- /* fallthrough */
12214
+ PRISM_FALLTHROUGH
12152
12215
  case '\n':
12153
12216
  // If we've hit a newline, then we need to track that in
12154
12217
  // the list of newlines.
@@ -12190,7 +12253,7 @@ parser_lex(pm_parser_t *parser) {
12190
12253
  pm_token_buffer_push_byte(&token_buffer.base, '\r');
12191
12254
  break;
12192
12255
  }
12193
- /* fallthrough */
12256
+ PRISM_FALLTHROUGH
12194
12257
  case '\n':
12195
12258
  if (parser->heredoc_end) {
12196
12259
  // ... if we are on the same line as a heredoc,
@@ -12316,10 +12379,29 @@ parser_lex(pm_parser_t *parser) {
12316
12379
  continue;
12317
12380
  }
12318
12381
 
12382
+ uint8_t term = lex_mode->as.string.terminator;
12383
+ bool is_terminator = (*breakpoint == term);
12384
+
12385
+ // If the terminator is newline, we need to consider \r\n _also_ a newline
12386
+ // For example: `%r\nfoo\r\n`
12387
+ // The string should be /foo/, not /foo\r/
12388
+ if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12389
+ if (term == '\n') {
12390
+ is_terminator = true;
12391
+ }
12392
+
12393
+ // If the terminator is a CR, but we see a CRLF, we need to
12394
+ // treat the CRLF as a newline, meaning this is _not_ the
12395
+ // terminator
12396
+ if (term == '\r') {
12397
+ is_terminator = false;
12398
+ }
12399
+ }
12400
+
12319
12401
  // Note that we have to check the terminator here first because we could
12320
12402
  // potentially be parsing a % string that has a # character as the
12321
12403
  // terminator.
12322
- if (*breakpoint == lex_mode->as.string.terminator) {
12404
+ if (is_terminator) {
12323
12405
  // If this terminator doesn't actually close the string, then we need
12324
12406
  // to continue on past it.
12325
12407
  if (lex_mode->as.string.nesting > 0) {
@@ -12379,7 +12461,7 @@ parser_lex(pm_parser_t *parser) {
12379
12461
  pm_token_buffer_escape(parser, &token_buffer);
12380
12462
  token_buffer.cursor = breakpoint;
12381
12463
 
12382
- /* fallthrough */
12464
+ PRISM_FALLTHROUGH
12383
12465
  case '\n':
12384
12466
  // When we hit a newline, we need to flush any potential
12385
12467
  // heredocs. Note that this has to happen after we check
@@ -12424,7 +12506,7 @@ parser_lex(pm_parser_t *parser) {
12424
12506
  pm_token_buffer_push_byte(&token_buffer, '\r');
12425
12507
  break;
12426
12508
  }
12427
- /* fallthrough */
12509
+ PRISM_FALLTHROUGH
12428
12510
  case '\n':
12429
12511
  if (!lex_mode->as.string.interpolation) {
12430
12512
  pm_token_buffer_push_byte(&token_buffer, '\\');
@@ -12632,7 +12714,7 @@ parser_lex(pm_parser_t *parser) {
12632
12714
  pm_token_buffer_escape(parser, &token_buffer);
12633
12715
  token_buffer.cursor = breakpoint;
12634
12716
 
12635
- /* fallthrough */
12717
+ PRISM_FALLTHROUGH
12636
12718
  case '\n': {
12637
12719
  if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12638
12720
  parser_flush_heredoc_end(parser);
@@ -12732,7 +12814,7 @@ parser_lex(pm_parser_t *parser) {
12732
12814
  pm_token_buffer_push_byte(&token_buffer, '\r');
12733
12815
  break;
12734
12816
  }
12735
- /* fallthrough */
12817
+ PRISM_FALLTHROUGH
12736
12818
  case '\n':
12737
12819
  pm_token_buffer_push_byte(&token_buffer, '\\');
12738
12820
  pm_token_buffer_push_byte(&token_buffer, '\n');
@@ -12752,7 +12834,7 @@ parser_lex(pm_parser_t *parser) {
12752
12834
  pm_token_buffer_push_byte(&token_buffer, '\r');
12753
12835
  break;
12754
12836
  }
12755
- /* fallthrough */
12837
+ PRISM_FALLTHROUGH
12756
12838
  case '\n':
12757
12839
  // If we are in a tilde here, we should
12758
12840
  // break out of the loop and return the
@@ -12903,7 +12985,7 @@ typedef struct {
12903
12985
 
12904
12986
  pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12905
12987
  // rescue
12906
- [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
12988
+ [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12907
12989
 
12908
12990
  // if unless until while
12909
12991
  [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
@@ -13044,14 +13126,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
13044
13126
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13045
13127
  }
13046
13128
 
13047
- /**
13048
- * Returns true if the current token is any of the six given types.
13049
- */
13050
- static inline bool
13051
- match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
13052
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
13053
- }
13054
-
13055
13129
  /**
13056
13130
  * Returns true if the current token is any of the seven given types.
13057
13131
  */
@@ -13068,6 +13142,14 @@ match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
13068
13142
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13069
13143
  }
13070
13144
 
13145
+ /**
13146
+ * Returns true if the current token is any of the nine given types.
13147
+ */
13148
+ static inline bool
13149
+ match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
13150
+ return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
13151
+ }
13152
+
13071
13153
  /**
13072
13154
  * If the current token is of the specified type, lex forward by one token and
13073
13155
  * return true. Otherwise, return false. For example:
@@ -13096,19 +13178,6 @@ accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13096
13178
  return false;
13097
13179
  }
13098
13180
 
13099
- /**
13100
- * If the current token is any of the three given types, lex forward by one
13101
- * token and return true. Otherwise return false.
13102
- */
13103
- static inline bool
13104
- accept3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13105
- if (match3(parser, type1, type2, type3)) {
13106
- parser_lex(parser);
13107
- return true;
13108
- }
13109
- return false;
13110
- }
13111
-
13112
13181
  /**
13113
13182
  * This function indicates that the parser expects a token in a specific
13114
13183
  * position. For example, if you're parsing a BEGIN block, you know that a { is
@@ -13146,20 +13215,6 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
13146
13215
  parser->previous.type = PM_TOKEN_MISSING;
13147
13216
  }
13148
13217
 
13149
- /**
13150
- * This function is the same as expect2, but it expects one of three token types.
13151
- */
13152
- static void
13153
- expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_diagnostic_id_t diag_id) {
13154
- if (accept3(parser, type1, type2, type3)) return;
13155
-
13156
- const uint8_t *location = parser->previous.end;
13157
- pm_parser_err(parser, location, location, diag_id);
13158
-
13159
- parser->previous.start = location;
13160
- parser->previous.type = PM_TOKEN_MISSING;
13161
- }
13162
-
13163
13218
  /**
13164
13219
  * A special expect1 that expects a heredoc terminator and handles popping the
13165
13220
  * lex mode accordingly.
@@ -13501,7 +13556,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
13501
13556
  return (pm_node_t *) pm_index_target_node_create(parser, call);
13502
13557
  }
13503
13558
  }
13504
- /* fallthrough */
13559
+ PRISM_FALLTHROUGH
13505
13560
  default:
13506
13561
  // In this case we have a node that we don't know how to convert
13507
13562
  // into a target. We need to treat it as an error. For now, we'll
@@ -13583,7 +13638,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13583
13638
  case PM_BACK_REFERENCE_READ_NODE:
13584
13639
  case PM_NUMBERED_REFERENCE_READ_NODE:
13585
13640
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13586
- /* fallthrough */
13641
+ PRISM_FALLTHROUGH
13587
13642
  case PM_GLOBAL_VARIABLE_READ_NODE: {
13588
13643
  pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13589
13644
  pm_node_destroy(parser, target);
@@ -13673,7 +13728,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13673
13728
  return target;
13674
13729
  }
13675
13730
 
13676
- if (char_is_identifier_start(parser, call->message_loc.start)) {
13731
+ if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
13677
13732
  // When we get here, we have a method call, because it was
13678
13733
  // previously marked as a method call but now we have an =. This
13679
13734
  // looks like:
@@ -13710,6 +13765,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13710
13765
 
13711
13766
  // Replace the name with "[]=".
13712
13767
  call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13768
+
13769
+ // Ensure that the arguments for []= don't contain keywords
13770
+ pm_index_arguments_check(parser, call->arguments, call->block);
13713
13771
  pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13714
13772
 
13715
13773
  return target;
@@ -13722,7 +13780,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13722
13780
  // is no way for us to attach it to the tree at this point.
13723
13781
  pm_node_destroy(parser, value);
13724
13782
  }
13725
- /* fallthrough */
13783
+ PRISM_FALLTHROUGH
13726
13784
  default:
13727
13785
  // In this case we have a node that we don't know how to convert into a
13728
13786
  // target. We need to treat it as an error. For now, we'll mark it as an
@@ -13898,6 +13956,15 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13898
13956
  if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13899
13957
  parser_lex(parser);
13900
13958
 
13959
+ // If we are at the end of the file, then we need to stop parsing
13960
+ // the statements entirely at this point. Mark the parser as
13961
+ // recovering, as we know that EOF closes the top-level context, and
13962
+ // then break out of the loop.
13963
+ if (match1(parser, PM_TOKEN_EOF)) {
13964
+ parser->recovering = true;
13965
+ break;
13966
+ }
13967
+
13901
13968
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13902
13969
  if (context_terminator(context, &parser->current)) break;
13903
13970
  } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
@@ -14191,6 +14258,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
14191
14258
  if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
14192
14259
  pm_parser_scope_forwarding_positionals_check(parser, &operator);
14193
14260
  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14261
+ if (parsed_bare_hash) {
14262
+ pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14263
+ }
14194
14264
  } else {
14195
14265
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14196
14266
 
@@ -14239,7 +14309,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
14239
14309
  }
14240
14310
  }
14241
14311
  }
14242
- /* fallthrough */
14312
+ PRISM_FALLTHROUGH
14243
14313
  default: {
14244
14314
  if (argument == NULL) {
14245
14315
  argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
@@ -14482,6 +14552,7 @@ parse_parameters(
14482
14552
  bool allows_trailing_comma,
14483
14553
  bool allows_forwarding_parameters,
14484
14554
  bool accepts_blocks_in_defaults,
14555
+ bool in_block,
14485
14556
  uint16_t depth
14486
14557
  ) {
14487
14558
  pm_do_loop_stack_push(parser, false);
@@ -14646,7 +14717,7 @@ parse_parameters(
14646
14717
  break;
14647
14718
  }
14648
14719
  case PM_TOKEN_LABEL: {
14649
- if (!uses_parentheses) parser->in_keyword_arg = true;
14720
+ if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14650
14721
  update_parameter_state(parser, &parser->current, &order);
14651
14722
 
14652
14723
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
@@ -15009,8 +15080,8 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
15009
15080
  case PM_TOKEN_NEWLINE:
15010
15081
  case PM_TOKEN_SEMICOLON:
15011
15082
  case PM_TOKEN_KEYWORD_THEN:
15012
- // Here we have a terminator for the rescue keyword, in which case we're
15013
- // going to just continue on.
15083
+ // Here we have a terminator for the rescue keyword, in which
15084
+ // case we're going to just continue on.
15014
15085
  break;
15015
15086
  default: {
15016
15087
  if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
@@ -15042,9 +15113,12 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
15042
15113
  }
15043
15114
 
15044
15115
  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15045
- accept1(parser, PM_TOKEN_KEYWORD_THEN);
15116
+ if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15117
+ rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
15118
+ }
15046
15119
  } else {
15047
15120
  expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
15121
+ rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
15048
15122
  }
15049
15123
 
15050
15124
  if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
@@ -15115,7 +15189,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
15115
15189
  case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15116
15190
  case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15117
15191
  case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15118
- default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15192
+ default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15119
15193
  }
15120
15194
 
15121
15195
  else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
@@ -15210,6 +15284,7 @@ parse_block_parameters(
15210
15284
  allows_trailing_comma,
15211
15285
  false,
15212
15286
  accepts_blocks_in_defaults,
15287
+ true,
15213
15288
  (uint16_t) (depth + 1)
15214
15289
  );
15215
15290
  }
@@ -16125,7 +16200,7 @@ parse_operator_symbol_name(const pm_token_t *name) {
16125
16200
  case PM_TOKEN_TILDE:
16126
16201
  case PM_TOKEN_BANG:
16127
16202
  if (name->end[-1] == '@') return name->end - 1;
16128
- /* fallthrough */
16203
+ PRISM_FALLTHROUGH
16129
16204
  default:
16130
16205
  return name->end;
16131
16206
  }
@@ -16381,14 +16456,15 @@ static pm_node_t *
16381
16456
  parse_variable(pm_parser_t *parser) {
16382
16457
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16383
16458
  int depth;
16459
+ bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
16384
16460
 
16385
- if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
16461
+ if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16386
16462
  return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16387
16463
  }
16388
16464
 
16389
16465
  pm_scope_t *current_scope = parser->current_scope;
16390
16466
  if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16391
- if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
16467
+ if (is_numbered_param) {
16392
16468
  // When you use a numbered parameter, it implies the existence of
16393
16469
  // all of the locals that exist before it. For example, referencing
16394
16470
  // _2 means that _1 must exist. Therefore here we loop through all
@@ -16758,6 +16834,10 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
16758
16834
  // If we haven't already created our container for concatenation,
16759
16835
  // we'll do that now.
16760
16836
  if (!concating) {
16837
+ if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16838
+ pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16839
+ }
16840
+
16761
16841
  concating = true;
16762
16842
  pm_token_t bounds = not_provided(parser);
16763
16843
 
@@ -16996,7 +17076,7 @@ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const u
16996
17076
  if (length == 0) return false;
16997
17077
 
16998
17078
  // First ensure that it starts with a valid identifier starting character.
16999
- size_t width = char_is_identifier_start(parser, start);
17079
+ size_t width = char_is_identifier_start(parser, start, end - start);
17000
17080
  if (width == 0) return false;
17001
17081
 
17002
17082
  // Next, ensure that it's not an uppercase character.
@@ -17009,7 +17089,7 @@ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const u
17009
17089
  // Next, iterate through all of the bytes of the string to ensure that they
17010
17090
  // are all valid identifier characters.
17011
17091
  const uint8_t *cursor = start + width;
17012
- while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
17092
+ while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
17013
17093
  return cursor == end;
17014
17094
  }
17015
17095
 
@@ -17096,7 +17176,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
17096
17176
  break;
17097
17177
  }
17098
17178
  }
17099
- /* fallthrough */
17179
+ PRISM_FALLTHROUGH
17100
17180
  default: {
17101
17181
  // If we get anything else, then this is an error. For this we'll
17102
17182
  // create a missing node for the value and create an assoc node for
@@ -17482,7 +17562,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
17482
17562
  pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17483
17563
  accept1(parser, PM_TOKEN_NEWLINE);
17484
17564
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17485
- pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
17565
+ pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0);
17486
17566
 
17487
17567
  if (node == NULL) {
17488
17568
  node = right;
@@ -17592,7 +17672,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
17592
17672
  break;
17593
17673
  }
17594
17674
  }
17595
- /* fallthrough */
17675
+ PRISM_FALLTHROUGH
17596
17676
  default:
17597
17677
  node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17598
17678
  break;
@@ -17614,7 +17694,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
17614
17694
  // Gather up all of the patterns into the list.
17615
17695
  while (accept1(parser, PM_TOKEN_COMMA)) {
17616
17696
  // Break early here in case we have a trailing comma.
17617
- if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
17697
+ if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17618
17698
  node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17619
17699
  pm_node_list_append(&nodes, node);
17620
17700
  trailing_rest = true;
@@ -18105,12 +18185,19 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18105
18185
  case PM_TOKEN_PARENTHESIS_LEFT:
18106
18186
  case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
18107
18187
  pm_token_t opening = parser->current;
18188
+ pm_node_flags_t flags = 0;
18108
18189
 
18109
18190
  pm_node_list_t current_block_exits = { 0 };
18110
18191
  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18111
18192
 
18112
18193
  parser_lex(parser);
18113
- while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
18194
+ while (true) {
18195
+ if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18196
+ flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18197
+ } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18198
+ break;
18199
+ }
18200
+ }
18114
18201
 
18115
18202
  // If this is the end of the file or we match a right parenthesis, then
18116
18203
  // we have an empty parentheses node, and we can immediately return.
@@ -18120,7 +18207,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18120
18207
  pop_block_exits(parser, previous_block_exits);
18121
18208
  pm_node_list_free(&current_block_exits);
18122
18209
 
18123
- return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous);
18210
+ return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags);
18124
18211
  }
18125
18212
 
18126
18213
  // Otherwise, we're going to parse the first statement in the list
@@ -18133,9 +18220,23 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18133
18220
  // Determine if this statement is followed by a terminator. In the
18134
18221
  // case of a single statement, this is fine. But in the case of
18135
18222
  // multiple statements it's required.
18136
- bool terminator_found = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18223
+ bool terminator_found = false;
18224
+
18225
+ if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18226
+ terminator_found = true;
18227
+ flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18228
+ } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
18229
+ terminator_found = true;
18230
+ }
18231
+
18137
18232
  if (terminator_found) {
18138
- while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18233
+ while (true) {
18234
+ if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18235
+ flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18236
+ } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18237
+ break;
18238
+ }
18239
+ }
18139
18240
  }
18140
18241
 
18141
18242
  // If we hit a right parenthesis, then we're done parsing the
@@ -18207,13 +18308,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18207
18308
  pm_statements_node_t *statements = pm_statements_node_create(parser);
18208
18309
  pm_statements_node_body_append(parser, statements, statement, true);
18209
18310
 
18210
- return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
18311
+ return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
18211
18312
  }
18212
18313
 
18213
18314
  // If we have more than one statement in the set of parentheses,
18214
18315
  // then we are going to parse all of them as a list of statements.
18215
18316
  // We'll do that here.
18216
18317
  context_push(parser, PM_CONTEXT_PARENS);
18318
+ flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18319
+
18217
18320
  pm_statements_node_t *statements = pm_statements_node_create(parser);
18218
18321
  pm_statements_node_body_append(parser, statements, statement, true);
18219
18322
 
@@ -18290,7 +18393,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18290
18393
  pm_node_list_free(&current_block_exits);
18291
18394
 
18292
18395
  pm_void_statements_check(parser, statements, true);
18293
- return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
18396
+ return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
18294
18397
  }
18295
18398
  case PM_TOKEN_BRACE_LEFT: {
18296
18399
  // If we were passed a current_hash_keys via the parser, then that
@@ -18722,7 +18825,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18722
18825
  pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18723
18826
  }
18724
18827
  }
18725
- /* fallthrough */
18828
+ PRISM_FALLTHROUGH
18726
18829
  default:
18727
18830
  return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18728
18831
  }
@@ -19213,6 +19316,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19213
19316
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
19214
19317
  parser_lex(parser);
19215
19318
 
19319
+ // This will be false if the method name is not a valid identifier
19320
+ // but could be followed by an operator.
19321
+ bool valid_name = true;
19322
+
19216
19323
  switch (parser->current.type) {
19217
19324
  case PM_CASE_OPERATOR:
19218
19325
  pm_parser_scope_push(parser, true);
@@ -19242,10 +19349,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19242
19349
 
19243
19350
  break;
19244
19351
  }
19245
- case PM_TOKEN_CONSTANT:
19246
19352
  case PM_TOKEN_INSTANCE_VARIABLE:
19247
19353
  case PM_TOKEN_CLASS_VARIABLE:
19248
19354
  case PM_TOKEN_GLOBAL_VARIABLE:
19355
+ valid_name = false;
19356
+ PRISM_FALLTHROUGH
19357
+ case PM_TOKEN_CONSTANT:
19249
19358
  case PM_TOKEN_KEYWORD_NIL:
19250
19359
  case PM_TOKEN_KEYWORD_SELF:
19251
19360
  case PM_TOKEN_KEYWORD_TRUE:
@@ -19303,6 +19412,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19303
19412
 
19304
19413
  name = parse_method_definition_name(parser);
19305
19414
  } else {
19415
+ if (!valid_name) {
19416
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19417
+ }
19418
+
19306
19419
  name = identifier;
19307
19420
  }
19308
19421
  break;
@@ -19326,7 +19439,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19326
19439
  expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
19327
19440
 
19328
19441
  operator = parser->previous;
19329
- receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
19442
+ receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0);
19330
19443
 
19331
19444
  // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
19332
19445
  // reason as described the above.
@@ -19353,7 +19466,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19353
19466
  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19354
19467
  params = NULL;
19355
19468
  } else {
19356
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, (uint16_t) (depth + 1));
19469
+ params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
19357
19470
  }
19358
19471
 
19359
19472
  lex_state_set(parser, PM_LEX_STATE_BEG);
@@ -19378,7 +19491,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19378
19491
 
19379
19492
  lparen = not_provided(parser);
19380
19493
  rparen = not_provided(parser);
19381
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, (uint16_t) (depth + 1));
19494
+ params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
19382
19495
 
19383
19496
  context_pop(parser);
19384
19497
  break;
@@ -19413,7 +19526,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19413
19526
  context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
19414
19527
 
19415
19528
  pm_token_t rescue_keyword = parser->previous;
19416
- pm_node_t *value = parse_expression(parser, binding_power, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19529
+ pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19417
19530
  context_pop(parser);
19418
19531
 
19419
19532
  statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
@@ -19656,11 +19769,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19656
19769
  accept1(parser, PM_TOKEN_NEWLINE);
19657
19770
 
19658
19771
  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19659
- arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19772
+ pm_token_t lparen = parser->previous;
19660
19773
 
19661
19774
  if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19662
- arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19775
+ receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
19663
19776
  } else {
19777
+ arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
19664
19778
  receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19665
19779
 
19666
19780
  if (!parser->recovering) {
@@ -19787,9 +19901,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19787
19901
  pm_do_loop_stack_pop(parser);
19788
19902
  context_pop(parser);
19789
19903
 
19790
- expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19791
- pm_statements_node_t *statements = NULL;
19904
+ pm_token_t do_keyword;
19905
+ if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19906
+ do_keyword = parser->previous;
19907
+ } else {
19908
+ do_keyword = not_provided(parser);
19909
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19910
+ }
19792
19911
 
19912
+ pm_statements_node_t *statements = NULL;
19793
19913
  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19794
19914
  pm_accepts_block_stack_push(parser, true);
19795
19915
  statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
@@ -19800,7 +19920,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19800
19920
  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19801
19921
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19802
19922
 
19803
- return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19923
+ return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19804
19924
  }
19805
19925
  case PM_TOKEN_KEYWORD_WHILE: {
19806
19926
  size_t opening_newline_index = token_newline_index(parser);
@@ -19815,9 +19935,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19815
19935
  pm_do_loop_stack_pop(parser);
19816
19936
  context_pop(parser);
19817
19937
 
19818
- expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19819
- pm_statements_node_t *statements = NULL;
19938
+ pm_token_t do_keyword;
19939
+ if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19940
+ do_keyword = parser->previous;
19941
+ } else {
19942
+ do_keyword = not_provided(parser);
19943
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19944
+ }
19820
19945
 
19946
+ pm_statements_node_t *statements = NULL;
19821
19947
  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19822
19948
  pm_accepts_block_stack_push(parser, true);
19823
19949
  statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
@@ -19828,7 +19954,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19828
19954
  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19829
19955
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19830
19956
 
19831
- return (pm_node_t *) pm_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19957
+ return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19832
19958
  }
19833
19959
  case PM_TOKEN_PERCENT_LOWER_I: {
19834
19960
  parser_lex(parser);
@@ -20621,7 +20747,7 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_
20621
20747
  pm_token_t rescue = parser->current;
20622
20748
  parser_lex(parser);
20623
20749
 
20624
- pm_node_t *right = parse_expression(parser, binding_power, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20750
+ pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20625
20751
  context_pop(parser);
20626
20752
 
20627
20753
  return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
@@ -20727,7 +20853,7 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
20727
20853
  }
20728
20854
  }
20729
20855
 
20730
- pm_node_t *right = parse_expression(parser, binding_power, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20856
+ pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20731
20857
  context_pop(parser);
20732
20858
 
20733
20859
  return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
@@ -20783,6 +20909,123 @@ typedef struct {
20783
20909
  bool shared;
20784
20910
  } parse_regular_expression_named_capture_data_t;
20785
20911
 
20912
+ static inline const uint8_t *
20913
+ pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20914
+ cursor++;
20915
+
20916
+ if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20917
+ uint8_t value = escape_hexadecimal_digit(*cursor);
20918
+ cursor++;
20919
+
20920
+ if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20921
+ value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20922
+ cursor++;
20923
+ }
20924
+
20925
+ pm_buffer_append_byte(unescaped, value);
20926
+ } else {
20927
+ pm_buffer_append_string(unescaped, "\\x", 2);
20928
+ }
20929
+
20930
+ return cursor;
20931
+ }
20932
+
20933
+ static inline const uint8_t *
20934
+ pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20935
+ uint8_t value = (uint8_t) (*cursor - '0');
20936
+ cursor++;
20937
+
20938
+ if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20939
+ value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20940
+ cursor++;
20941
+
20942
+ if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20943
+ value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20944
+ cursor++;
20945
+ }
20946
+ }
20947
+
20948
+ pm_buffer_append_byte(unescaped, value);
20949
+ return cursor;
20950
+ }
20951
+
20952
+ static inline const uint8_t *
20953
+ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20954
+ const uint8_t *start = cursor - 1;
20955
+ cursor++;
20956
+
20957
+ if (cursor >= end) {
20958
+ pm_buffer_append_string(unescaped, "\\u", 2);
20959
+ return cursor;
20960
+ }
20961
+
20962
+ if (*cursor != '{') {
20963
+ size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20964
+ uint32_t value = escape_unicode(parser, cursor, length);
20965
+
20966
+ if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20967
+ pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20968
+ }
20969
+
20970
+ return cursor + length;
20971
+ }
20972
+
20973
+ cursor++;
20974
+ for (;;) {
20975
+ while (cursor < end && *cursor == ' ') cursor++;
20976
+
20977
+ if (cursor >= end) break;
20978
+ if (*cursor == '}') {
20979
+ cursor++;
20980
+ break;
20981
+ }
20982
+
20983
+ size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
20984
+ uint32_t value = escape_unicode(parser, cursor, length);
20985
+
20986
+ (void) pm_buffer_append_unicode_codepoint(unescaped, value);
20987
+ cursor += length;
20988
+ }
20989
+
20990
+ return cursor;
20991
+ }
20992
+
20993
+ static void
20994
+ pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
20995
+ const uint8_t *end = source + length;
20996
+ pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
20997
+
20998
+ for (;;) {
20999
+ if (++cursor >= end) {
21000
+ pm_buffer_append_byte(unescaped, '\\');
21001
+ return;
21002
+ }
21003
+
21004
+ switch (*cursor) {
21005
+ case 'x':
21006
+ cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
21007
+ break;
21008
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
21009
+ cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
21010
+ break;
21011
+ case 'u':
21012
+ cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
21013
+ break;
21014
+ default:
21015
+ pm_buffer_append_byte(unescaped, '\\');
21016
+ break;
21017
+ }
21018
+
21019
+ const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
21020
+ if (next_cursor == NULL) break;
21021
+
21022
+ pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
21023
+ cursor = next_cursor;
21024
+ }
21025
+
21026
+ pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
21027
+ }
21028
+
20786
21029
  /**
20787
21030
  * This callback is called when the regular expression parser encounters a named
20788
21031
  * capture group.
@@ -20797,13 +21040,32 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20797
21040
 
20798
21041
  const uint8_t *source = pm_string_source(capture);
20799
21042
  size_t length = pm_string_length(capture);
21043
+ pm_buffer_t unescaped = { 0 };
21044
+
21045
+ // First, we need to handle escapes within the name of the capture group.
21046
+ // This is because regular expressions have three different representations
21047
+ // in prism. The first is the plain source code. The second is the
21048
+ // representation that will be sent to the regular expression engine, which
21049
+ // is the value of the "unescaped" field. This is poorly named, because it
21050
+ // actually still contains escapes, just a subset of them that the regular
21051
+ // expression engine knows how to handle. The third representation is fully
21052
+ // unescaped, which is what we need.
21053
+ const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
21054
+ if (PRISM_UNLIKELY(cursor != NULL)) {
21055
+ pm_named_capture_escape(parser, &unescaped, source, length, cursor);
21056
+ source = (const uint8_t *) pm_buffer_value(&unescaped);
21057
+ length = pm_buffer_length(&unescaped);
21058
+ }
20800
21059
 
20801
21060
  pm_location_t location;
20802
21061
  pm_constant_id_t name;
20803
21062
 
20804
21063
  // If the name of the capture group isn't a valid identifier, we do
20805
21064
  // not add it to the local table.
20806
- if (!pm_slice_is_valid_local(parser, source, source + length)) return;
21065
+ if (!pm_slice_is_valid_local(parser, source, source + length)) {
21066
+ pm_buffer_free(&unescaped);
21067
+ return;
21068
+ }
20807
21069
 
20808
21070
  if (callback_data->shared) {
20809
21071
  // If the unescaped string is a slice of the source, then we can
@@ -20831,7 +21093,10 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20831
21093
  if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20832
21094
  // If the local is not already a local but it is a keyword, then we
20833
21095
  // do not want to add a capture for this.
20834
- if (pm_local_is_keyword((const char *) source, length)) return;
21096
+ if (pm_local_is_keyword((const char *) source, length)) {
21097
+ pm_buffer_free(&unescaped);
21098
+ return;
21099
+ }
20835
21100
 
20836
21101
  // If the identifier is not already a local, then we will add it to
20837
21102
  // the local table.
@@ -20849,6 +21114,8 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20849
21114
  pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
20850
21115
  pm_node_list_append(&callback_data->match->targets, target);
20851
21116
  }
21117
+
21118
+ pm_buffer_free(&unescaped);
20852
21119
  }
20853
21120
 
20854
21121
  /**
@@ -20898,7 +21165,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20898
21165
  pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
20899
21166
  }
20900
21167
  }
20901
- /* fallthrough */
21168
+ PRISM_FALLTHROUGH
20902
21169
  case PM_CASE_WRITABLE: {
20903
21170
  parser_lex(parser);
20904
21171
  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
@@ -20944,7 +21211,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20944
21211
  case PM_BACK_REFERENCE_READ_NODE:
20945
21212
  case PM_NUMBERED_REFERENCE_READ_NODE:
20946
21213
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20947
- /* fallthrough */
21214
+ PRISM_FALLTHROUGH
20948
21215
  case PM_GLOBAL_VARIABLE_READ_NODE: {
20949
21216
  parser_lex(parser);
20950
21217
 
@@ -20989,7 +21256,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20989
21256
  pm_node_destroy(parser, node);
20990
21257
  return result;
20991
21258
  }
21259
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21260
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21261
+ parser_lex(parser);
21262
+
21263
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21264
+ pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0);
21265
+
21266
+ parse_target_implicit_parameter(parser, node);
21267
+ pm_node_destroy(parser, node);
21268
+ return result;
21269
+ }
20992
21270
  case PM_LOCAL_VARIABLE_READ_NODE: {
21271
+ if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21272
+ PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21273
+ parse_target_implicit_parameter(parser, node);
21274
+ }
21275
+
20993
21276
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
20994
21277
  parser_lex(parser);
20995
21278
 
@@ -21062,7 +21345,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21062
21345
  case PM_BACK_REFERENCE_READ_NODE:
21063
21346
  case PM_NUMBERED_REFERENCE_READ_NODE:
21064
21347
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21065
- /* fallthrough */
21348
+ PRISM_FALLTHROUGH
21066
21349
  case PM_GLOBAL_VARIABLE_READ_NODE: {
21067
21350
  parser_lex(parser);
21068
21351
 
@@ -21107,7 +21390,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21107
21390
  pm_node_destroy(parser, node);
21108
21391
  return result;
21109
21392
  }
21393
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21394
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21395
+ parser_lex(parser);
21396
+
21397
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21398
+ pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0);
21399
+
21400
+ parse_target_implicit_parameter(parser, node);
21401
+ pm_node_destroy(parser, node);
21402
+ return result;
21403
+ }
21110
21404
  case PM_LOCAL_VARIABLE_READ_NODE: {
21405
+ if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21406
+ PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21407
+ parse_target_implicit_parameter(parser, node);
21408
+ }
21409
+
21111
21410
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
21112
21411
  parser_lex(parser);
21113
21412
 
@@ -21190,7 +21489,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21190
21489
  case PM_BACK_REFERENCE_READ_NODE:
21191
21490
  case PM_NUMBERED_REFERENCE_READ_NODE:
21192
21491
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21193
- /* fallthrough */
21492
+ PRISM_FALLTHROUGH
21194
21493
  case PM_GLOBAL_VARIABLE_READ_NODE: {
21195
21494
  parser_lex(parser);
21196
21495
 
@@ -21235,7 +21534,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21235
21534
  pm_node_destroy(parser, node);
21236
21535
  return result;
21237
21536
  }
21537
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21538
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21539
+ parser_lex(parser);
21540
+
21541
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21542
+ pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0);
21543
+
21544
+ parse_target_implicit_parameter(parser, node);
21545
+ pm_node_destroy(parser, node);
21546
+ return result;
21547
+ }
21238
21548
  case PM_LOCAL_VARIABLE_READ_NODE: {
21549
+ if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21550
+ PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21551
+ parse_target_implicit_parameter(parser, node);
21552
+ }
21553
+
21239
21554
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
21240
21555
  parser_lex(parser);
21241
21556
 
@@ -21400,6 +21715,33 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21400
21715
  case PM_TOKEN_STAR:
21401
21716
  case PM_TOKEN_STAR_STAR: {
21402
21717
  parser_lex(parser);
21718
+ pm_token_t operator = parser->previous;
21719
+ switch (PM_NODE_TYPE(node)) {
21720
+ case PM_RESCUE_MODIFIER_NODE: {
21721
+ pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
21722
+ if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21723
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21724
+ }
21725
+ break;
21726
+ }
21727
+ case PM_AND_NODE: {
21728
+ pm_and_node_t *cast = (pm_and_node_t *) node;
21729
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21730
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21731
+ }
21732
+ break;
21733
+ }
21734
+ case PM_OR_NODE: {
21735
+ pm_or_node_t *cast = (pm_or_node_t *) node;
21736
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21737
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21738
+ }
21739
+ break;
21740
+ }
21741
+ default:
21742
+ break;
21743
+ }
21744
+
21403
21745
  pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21404
21746
  return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21405
21747
  }
@@ -21427,6 +21769,32 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21427
21769
  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21428
21770
  }
21429
21771
 
21772
+ switch (PM_NODE_TYPE(node)) {
21773
+ case PM_RESCUE_MODIFIER_NODE: {
21774
+ pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
21775
+ if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21776
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21777
+ }
21778
+ break;
21779
+ }
21780
+ case PM_AND_NODE: {
21781
+ pm_and_node_t *cast = (pm_and_node_t *) node;
21782
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21783
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21784
+ }
21785
+ break;
21786
+ }
21787
+ case PM_OR_NODE: {
21788
+ pm_or_node_t *cast = (pm_or_node_t *) node;
21789
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21790
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21791
+ }
21792
+ break;
21793
+ }
21794
+ default:
21795
+ break;
21796
+ }
21797
+
21430
21798
  pm_token_t message;
21431
21799
 
21432
21800
  switch (parser->current.type) {
@@ -21774,6 +22142,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
21774
22142
  if (pm_symbol_node_label_p(node)) {
21775
22143
  return node;
21776
22144
  }
22145
+ break;
21777
22146
  default:
21778
22147
  break;
21779
22148
  }
@@ -21915,6 +22284,10 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
21915
22284
  static pm_statements_node_t *
21916
22285
  wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
21917
22286
  if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
22287
+ if (statements == NULL) {
22288
+ statements = pm_statements_node_create(parser);
22289
+ }
22290
+
21918
22291
  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21919
22292
  pm_arguments_node_arguments_append(
21920
22293
  arguments,
@@ -21930,6 +22303,10 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
21930
22303
 
21931
22304
  if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
21932
22305
  if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
22306
+ if (statements == NULL) {
22307
+ statements = pm_statements_node_create(parser);
22308
+ }
22309
+
21933
22310
  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21934
22311
  pm_arguments_node_arguments_append(
21935
22312
  arguments,
@@ -21998,9 +22375,7 @@ parse_program(pm_parser_t *parser) {
21998
22375
  parser_lex(parser);
21999
22376
  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22000
22377
 
22001
- if (statements == NULL) {
22002
- statements = pm_statements_node_create(parser);
22003
- } else if (!parser->parsing_eval) {
22378
+ if (statements != NULL && !parser->parsing_eval) {
22004
22379
  // If we have statements, then the top-level statement should be
22005
22380
  // explicitly checked as well. We have to do this here because
22006
22381
  // everywhere else we check all but the last statement.
@@ -22012,13 +22387,6 @@ parse_program(pm_parser_t *parser) {
22012
22387
  pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22013
22388
  pm_parser_scope_pop(parser);
22014
22389
 
22015
- // If this is an empty file, then we're still going to parse all of the
22016
- // statements in order to gather up all of the comments and such. Here we'll
22017
- // correct the location information.
22018
- if (pm_statements_node_body_length(statements) == 0) {
22019
- pm_statements_node_location_set(statements, parser->start, parser->start);
22020
- }
22021
-
22022
22390
  // At the top level, see if we need to wrap the statements in a program
22023
22391
  // node with a while loop based on the options.
22024
22392
  if (parser->command_line & (PM_OPTIONS_COMMAND_LINE_P | PM_OPTIONS_COMMAND_LINE_N)) {
@@ -22028,6 +22396,14 @@ parse_program(pm_parser_t *parser) {
22028
22396
  pm_node_list_free(&current_block_exits);
22029
22397
  }
22030
22398
 
22399
+ // If this is an empty file, then we're still going to parse all of the
22400
+ // statements in order to gather up all of the comments and such. Here we'll
22401
+ // correct the location information.
22402
+ if (statements == NULL) {
22403
+ statements = pm_statements_node_create(parser);
22404
+ pm_statements_node_location_set(statements, parser->start, parser->start);
22405
+ }
22406
+
22031
22407
  return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
22032
22408
  }
22033
22409
 
@@ -22221,7 +22597,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
22221
22597
 
22222
22598
  // Scopes given from the outside are not allowed to have numbered
22223
22599
  // parameters.
22224
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22600
+ parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22225
22601
 
22226
22602
  for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22227
22603
  const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
@@ -22431,11 +22807,11 @@ pm_parse(pm_parser_t *parser) {
22431
22807
  * otherwise return true.
22432
22808
  */
22433
22809
  static bool
22434
- pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets) {
22810
+ pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets) {
22435
22811
  #define LINE_SIZE 4096
22436
22812
  char line[LINE_SIZE];
22437
22813
 
22438
- while (memset(line, '\n', LINE_SIZE), fgets(line, LINE_SIZE, stream) != NULL) {
22814
+ while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22439
22815
  size_t length = LINE_SIZE;
22440
22816
  while (length > 0 && line[length - 1] == '\n') length--;
22441
22817
 
@@ -22502,16 +22878,16 @@ pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22502
22878
  * can stream stdin in to Ruby so we need to support a streaming API.
22503
22879
  */
22504
22880
  PRISM_EXPORTED_FUNCTION pm_node_t *
22505
- pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options) {
22881
+ pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options) {
22506
22882
  pm_buffer_init(buffer);
22507
22883
 
22508
- bool eof = pm_parse_stream_read(buffer, stream, fgets);
22884
+ bool eof = pm_parse_stream_read(buffer, stream, stream_fgets);
22509
22885
  pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22510
22886
  pm_node_t *node = pm_parse(parser);
22511
22887
 
22512
22888
  while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22513
22889
  pm_node_destroy(parser, node);
22514
- eof = pm_parse_stream_read(buffer, stream, fgets);
22890
+ eof = pm_parse_stream_read(buffer, stream, stream_fgets);
22515
22891
 
22516
22892
  pm_parser_free(parser);
22517
22893
  pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
@@ -22603,13 +22979,13 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
22603
22979
  * given stream into to the given buffer.
22604
22980
  */
22605
22981
  PRISM_EXPORTED_FUNCTION void
22606
- pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const char *data) {
22982
+ pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data) {
22607
22983
  pm_parser_t parser;
22608
22984
  pm_options_t options = { 0 };
22609
22985
  pm_options_read(&options, data);
22610
22986
 
22611
22987
  pm_buffer_t parser_buffer;
22612
- pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, fgets, &options);
22988
+ pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, &options);
22613
22989
  pm_serialize_header(buffer);
22614
22990
  pm_serialize_content(&parser, node, buffer);
22615
22991
  pm_buffer_append_byte(buffer, '\0');
@@ -22643,3 +23019,166 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
22643
23019
  }
22644
23020
 
22645
23021
  #endif
23022
+
23023
+ /******************************************************************************/
23024
+ /* Slice queries for the Ruby API */
23025
+ /******************************************************************************/
23026
+
23027
+ /** The category of slice returned from pm_slice_type. */
23028
+ typedef enum {
23029
+ /** Returned when the given encoding name is invalid. */
23030
+ PM_SLICE_TYPE_ERROR = -1,
23031
+
23032
+ /** Returned when no other types apply to the slice. */
23033
+ PM_SLICE_TYPE_NONE,
23034
+
23035
+ /** Returned when the slice is a valid local variable name. */
23036
+ PM_SLICE_TYPE_LOCAL,
23037
+
23038
+ /** Returned when the slice is a valid constant name. */
23039
+ PM_SLICE_TYPE_CONSTANT,
23040
+
23041
+ /** Returned when the slice is a valid method name. */
23042
+ PM_SLICE_TYPE_METHOD_NAME
23043
+ } pm_slice_type_t;
23044
+
23045
+ /**
23046
+ * Check that the slice is a valid local variable name or constant.
23047
+ */
23048
+ pm_slice_type_t
23049
+ pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
23050
+ // first, get the right encoding object
23051
+ const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
23052
+ if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
23053
+
23054
+ // check that there is at least one character
23055
+ if (length == 0) return PM_SLICE_TYPE_NONE;
23056
+
23057
+ size_t width;
23058
+ if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
23059
+ // valid because alphabetical
23060
+ } else if (*source == '_') {
23061
+ // valid because underscore
23062
+ width = 1;
23063
+ } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
23064
+ // valid because multibyte
23065
+ } else {
23066
+ // invalid because no match
23067
+ return PM_SLICE_TYPE_NONE;
23068
+ }
23069
+
23070
+ // determine the type of the slice based on the first character
23071
+ const uint8_t *end = source + length;
23072
+ pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
23073
+
23074
+ // next, iterate through all of the bytes of the string to ensure that they
23075
+ // are all valid identifier characters
23076
+ source += width;
23077
+
23078
+ while (source < end) {
23079
+ if ((width = encoding->alnum_char(source, end - source)) != 0) {
23080
+ // valid because alphanumeric
23081
+ source += width;
23082
+ } else if (*source == '_') {
23083
+ // valid because underscore
23084
+ source++;
23085
+ } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
23086
+ // valid because multibyte
23087
+ source += width;
23088
+ } else {
23089
+ // invalid because no match
23090
+ break;
23091
+ }
23092
+ }
23093
+
23094
+ // accept a ! or ? at the end of the slice as a method name
23095
+ if (*source == '!' || *source == '?' || *source == '=') {
23096
+ source++;
23097
+ result = PM_SLICE_TYPE_METHOD_NAME;
23098
+ }
23099
+
23100
+ // valid if we are at the end of the slice
23101
+ return source == end ? result : PM_SLICE_TYPE_NONE;
23102
+ }
23103
+
23104
+ /**
23105
+ * Check that the slice is a valid local variable name.
23106
+ */
23107
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
23108
+ pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
23109
+ switch (pm_slice_type(source, length, encoding_name)) {
23110
+ case PM_SLICE_TYPE_ERROR:
23111
+ return PM_STRING_QUERY_ERROR;
23112
+ case PM_SLICE_TYPE_NONE:
23113
+ case PM_SLICE_TYPE_CONSTANT:
23114
+ case PM_SLICE_TYPE_METHOD_NAME:
23115
+ return PM_STRING_QUERY_FALSE;
23116
+ case PM_SLICE_TYPE_LOCAL:
23117
+ return PM_STRING_QUERY_TRUE;
23118
+ }
23119
+
23120
+ assert(false && "unreachable");
23121
+ return PM_STRING_QUERY_FALSE;
23122
+ }
23123
+
23124
+ /**
23125
+ * Check that the slice is a valid constant name.
23126
+ */
23127
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
23128
+ pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
23129
+ switch (pm_slice_type(source, length, encoding_name)) {
23130
+ case PM_SLICE_TYPE_ERROR:
23131
+ return PM_STRING_QUERY_ERROR;
23132
+ case PM_SLICE_TYPE_NONE:
23133
+ case PM_SLICE_TYPE_LOCAL:
23134
+ case PM_SLICE_TYPE_METHOD_NAME:
23135
+ return PM_STRING_QUERY_FALSE;
23136
+ case PM_SLICE_TYPE_CONSTANT:
23137
+ return PM_STRING_QUERY_TRUE;
23138
+ }
23139
+
23140
+ assert(false && "unreachable");
23141
+ return PM_STRING_QUERY_FALSE;
23142
+ }
23143
+
23144
+ /**
23145
+ * Check that the slice is a valid method name.
23146
+ */
23147
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
23148
+ pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
23149
+ #define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
23150
+ #define C1(c) (*source == c)
23151
+ #define C2(s) (memcmp(source, s, 2) == 0)
23152
+ #define C3(s) (memcmp(source, s, 3) == 0)
23153
+
23154
+ switch (pm_slice_type(source, length, encoding_name)) {
23155
+ case PM_SLICE_TYPE_ERROR:
23156
+ return PM_STRING_QUERY_ERROR;
23157
+ case PM_SLICE_TYPE_NONE:
23158
+ break;
23159
+ case PM_SLICE_TYPE_LOCAL:
23160
+ // numbered parameters are not valid method names
23161
+ return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
23162
+ case PM_SLICE_TYPE_CONSTANT:
23163
+ // all constants are valid method names
23164
+ case PM_SLICE_TYPE_METHOD_NAME:
23165
+ // all method names are valid method names
23166
+ return PM_STRING_QUERY_TRUE;
23167
+ }
23168
+
23169
+ switch (length) {
23170
+ case 1:
23171
+ return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
23172
+ case 2:
23173
+ return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
23174
+ case 3:
23175
+ return B(C3("===") || C3("<=>") || C3("[]="));
23176
+ default:
23177
+ return PM_STRING_QUERY_FALSE;
23178
+ }
23179
+
23180
+ #undef B
23181
+ #undef C1
23182
+ #undef C2
23183
+ #undef C3
23184
+ }