prism 1.1.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +39 -1
  3. data/Makefile +1 -1
  4. data/config.yml +422 -3
  5. data/docs/build_system.md +8 -11
  6. data/docs/relocation.md +34 -0
  7. data/ext/prism/api_node.c +18 -10
  8. data/ext/prism/extconf.rb +13 -36
  9. data/ext/prism/extension.c +68 -0
  10. data/ext/prism/extension.h +1 -1
  11. data/include/prism/ast.h +427 -3
  12. data/include/prism/defines.h +22 -7
  13. data/include/prism/diagnostic.h +1 -0
  14. data/include/prism/parser.h +25 -12
  15. data/include/prism/version.h +2 -2
  16. data/include/prism.h +47 -0
  17. data/lib/prism/dot_visitor.rb +10 -0
  18. data/lib/prism/dsl.rb +4 -4
  19. data/lib/prism/ffi.rb +49 -2
  20. data/lib/prism/inspect_visitor.rb +2 -0
  21. data/lib/prism/node.rb +1839 -96
  22. data/lib/prism/parse_result/errors.rb +1 -1
  23. data/lib/prism/parse_result.rb +140 -3
  24. data/lib/prism/reflection.rb +2 -2
  25. data/lib/prism/relocation.rb +504 -0
  26. data/lib/prism/serialize.rb +17 -5
  27. data/lib/prism/string_query.rb +30 -0
  28. data/lib/prism/translation/parser/compiler.rb +36 -26
  29. data/lib/prism/translation/parser.rb +3 -3
  30. data/lib/prism/translation/ripper.rb +1 -5
  31. data/lib/prism/translation/ruby_parser.rb +14 -5
  32. data/lib/prism.rb +6 -4
  33. data/prism.gemspec +7 -1
  34. data/rbi/prism/dsl.rbi +4 -4
  35. data/rbi/prism/node.rbi +5118 -1030
  36. data/rbi/prism/parse_result.rbi +29 -0
  37. data/rbi/prism/string_query.rbi +12 -0
  38. data/rbi/prism.rbi +34 -34
  39. data/sig/prism/dsl.rbs +2 -2
  40. data/sig/prism/node.rbs +13 -98
  41. data/sig/prism/parse_result.rbs +20 -0
  42. data/sig/prism/relocation.rbs +185 -0
  43. data/sig/prism/string_query.rbs +11 -0
  44. data/src/diagnostic.c +3 -1
  45. data/src/node.c +18 -0
  46. data/src/prettyprint.c +32 -0
  47. data/src/prism.c +586 -195
  48. data/src/regexp.c +7 -3
  49. data/src/serialize.c +12 -0
  50. data/src/static_literals.c +1 -1
  51. data/src/util/pm_char.c +1 -1
  52. data/src/util/pm_string.c +1 -0
  53. metadata +9 -3
data/src/prism.c CHANGED
@@ -544,10 +544,7 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
544
544
  * token.
545
545
  */
546
546
  static void
547
- pm_parser_err_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
548
- const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
549
- size_t ident_length = lex_mode->as.heredoc.ident_length;
550
-
547
+ pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
551
548
  PM_PARSER_ERR_FORMAT(
552
549
  parser,
553
550
  ident_start,
@@ -964,7 +961,7 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals,
964
961
  if (local->name != PM_CONSTANT_ID_UNSET) {
965
962
  pm_constant_id_list_insert(list, (size_t) local->index, local->name);
966
963
 
967
- if (warn_unused && local->reads == 0) {
964
+ if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
968
965
  pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
969
966
 
970
967
  if (constant->length >= 1 && *constant->start != '_') {
@@ -2110,14 +2107,6 @@ pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2110
2107
  return node;
2111
2108
  }
2112
2109
 
2113
- /**
2114
- * Return the size of the given array node.
2115
- */
2116
- static inline size_t
2117
- pm_array_node_size(pm_array_node_t *node) {
2118
- return node->elements.size;
2119
- }
2120
-
2121
2110
  /**
2122
2111
  * Append an argument to an array node.
2123
2112
  */
@@ -4153,7 +4142,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4153
4142
 
4154
4143
  // If errno is set, then it should only be ERANGE. At this point we need to
4155
4144
  // check if it's infinity (it should be).
4156
- if (errno == ERANGE && isinf(value)) {
4145
+ if (errno == ERANGE && PRISM_ISINF(value)) {
4157
4146
  int warn_width;
4158
4147
  const char *ellipsis;
4159
4148
 
@@ -7695,7 +7684,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
7695
7684
  * Allocate a new UntilNode node.
7696
7685
  */
7697
7686
  static pm_until_node_t *
7698
- pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7687
+ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7699
7688
  pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7700
7689
  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7701
7690
 
@@ -7710,6 +7699,7 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
7710
7699
  },
7711
7700
  },
7712
7701
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7702
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7713
7703
  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7714
7704
  .predicate = predicate,
7715
7705
  .statements = statements
@@ -7738,6 +7728,7 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
7738
7728
  },
7739
7729
  },
7740
7730
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7731
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7741
7732
  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7742
7733
  .predicate = predicate,
7743
7734
  .statements = statements
@@ -7805,7 +7796,7 @@ pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statemen
7805
7796
  * Allocate a new WhileNode node.
7806
7797
  */
7807
7798
  static pm_while_node_t *
7808
- pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7799
+ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7809
7800
  pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7810
7801
  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7811
7802
 
@@ -7820,6 +7811,7 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
7820
7811
  },
7821
7812
  },
7822
7813
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7814
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7823
7815
  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7824
7816
  .predicate = predicate,
7825
7817
  .statements = statements
@@ -7848,6 +7840,7 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
7848
7840
  },
7849
7841
  },
7850
7842
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7843
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7851
7844
  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7852
7845
  .predicate = predicate,
7853
7846
  .statements = statements
@@ -7870,6 +7863,7 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
7870
7863
  .location = PM_LOCATION_NULL_VALUE(parser)
7871
7864
  },
7872
7865
  .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7866
+ .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7873
7867
  .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7874
7868
  .predicate = predicate,
7875
7869
  .statements = statements
@@ -8573,6 +8567,7 @@ context_terminator(pm_context_t context, pm_token_t *token) {
8573
8567
  case PM_CONTEXT_MAIN:
8574
8568
  case PM_CONTEXT_DEF_PARAMS:
8575
8569
  case PM_CONTEXT_DEFINED:
8570
+ case PM_CONTEXT_MULTI_TARGET:
8576
8571
  case PM_CONTEXT_TERNARY:
8577
8572
  case PM_CONTEXT_RESCUE_MODIFIER:
8578
8573
  return token->type == PM_TOKEN_EOF;
@@ -8777,6 +8772,7 @@ context_human(pm_context_t context) {
8777
8772
  case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8778
8773
  case PM_CONTEXT_MAIN: return "top level context";
8779
8774
  case PM_CONTEXT_MODULE: return "module definition";
8775
+ case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8780
8776
  case PM_CONTEXT_PARENS: return "parentheses";
8781
8777
  case PM_CONTEXT_POSTEXE: return "'END' block";
8782
8778
  case PM_CONTEXT_PREDICATE: return "predicate";
@@ -9051,6 +9047,10 @@ lex_global_variable(pm_parser_t *parser) {
9051
9047
  return PM_TOKEN_GLOBAL_VARIABLE;
9052
9048
  }
9053
9049
 
9050
+ // True if multiple characters are allowed after the declaration of the
9051
+ // global variable. Not true when it starts with "$-".
9052
+ bool allow_multiple = true;
9053
+
9054
9054
  switch (*parser->current.end) {
9055
9055
  case '~': // $~: match-data
9056
9056
  case '*': // $*: argv
@@ -9109,14 +9109,15 @@ lex_global_variable(pm_parser_t *parser) {
9109
9109
 
9110
9110
  case '-':
9111
9111
  parser->current.end++;
9112
- /* fallthrough */
9112
+ allow_multiple = false;
9113
+ PRISM_FALLTHROUGH
9113
9114
  default: {
9114
9115
  size_t width;
9115
9116
 
9116
9117
  if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
9117
9118
  do {
9118
9119
  parser->current.end += width;
9119
- } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9120
+ } while (allow_multiple && parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9120
9121
  } else if (pm_char_is_whitespace(peek(parser))) {
9121
9122
  // If we get here, then we have a $ followed by whitespace,
9122
9123
  // which is not allowed.
@@ -9881,6 +9882,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9881
9882
  }
9882
9883
  case 'c': {
9883
9884
  parser->current.end++;
9885
+ if (flags & PM_ESCAPE_FLAG_CONTROL) {
9886
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9887
+ }
9888
+
9884
9889
  if (parser->current.end == parser->end) {
9885
9890
  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9886
9891
  return;
@@ -9894,10 +9899,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9894
9899
  return;
9895
9900
  }
9896
9901
  case '\\':
9897
- if (flags & PM_ESCAPE_FLAG_CONTROL) {
9898
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9899
- return;
9900
- }
9901
9902
  parser->current.end++;
9902
9903
 
9903
9904
  if (match(parser, 'u') || match(parser, 'U')) {
@@ -9931,6 +9932,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9931
9932
  }
9932
9933
  case 'C': {
9933
9934
  parser->current.end++;
9935
+ if (flags & PM_ESCAPE_FLAG_CONTROL) {
9936
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9937
+ }
9938
+
9934
9939
  if (peek(parser) != '-') {
9935
9940
  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9936
9941
  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
@@ -9951,10 +9956,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9951
9956
  return;
9952
9957
  }
9953
9958
  case '\\':
9954
- if (flags & PM_ESCAPE_FLAG_CONTROL) {
9955
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9956
- return;
9957
- }
9958
9959
  parser->current.end++;
9959
9960
 
9960
9961
  if (match(parser, 'u') || match(parser, 'U')) {
@@ -9989,6 +9990,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9989
9990
  }
9990
9991
  case 'M': {
9991
9992
  parser->current.end++;
9993
+ if (flags & PM_ESCAPE_FLAG_META) {
9994
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9995
+ }
9996
+
9992
9997
  if (peek(parser) != '-') {
9993
9998
  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9994
9999
  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
@@ -10004,10 +10009,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
10004
10009
  uint8_t peeked = peek(parser);
10005
10010
  switch (peeked) {
10006
10011
  case '\\':
10007
- if (flags & PM_ESCAPE_FLAG_META) {
10008
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
10009
- return;
10010
- }
10011
10012
  parser->current.end++;
10012
10013
 
10013
10014
  if (match(parser, 'u') || match(parser, 'U')) {
@@ -10045,11 +10046,13 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
10045
10046
  escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10046
10047
  return;
10047
10048
  }
10049
+ PRISM_FALLTHROUGH
10048
10050
  }
10049
- /* fallthrough */
10050
10051
  default: {
10051
10052
  if (parser->current.end < parser->end) {
10052
10053
  escape_write_escape_encoded(parser, buffer);
10054
+ } else {
10055
+ pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10053
10056
  }
10054
10057
  return;
10055
10058
  }
@@ -10498,6 +10501,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10498
10501
  }
10499
10502
 
10500
10503
  const uint8_t *end = parser->current.end - 1;
10504
+ assert(end >= start);
10501
10505
  pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10502
10506
 
10503
10507
  token_buffer->cursor = end;
@@ -10578,9 +10582,15 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
10578
10582
  pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10579
10583
  }
10580
10584
 
10581
- const uint8_t delimiter = *parser->current.end;
10582
- parser->current.end += eol_length;
10585
+ uint8_t delimiter = *parser->current.end;
10583
10586
 
10587
+ // If our delimiter is \r\n, we want to treat it as if it's \n.
10588
+ // For example, %\r\nfoo\r\n should be "foo"
10589
+ if (eol_length == 2) {
10590
+ delimiter = *(parser->current.end + 1);
10591
+ }
10592
+
10593
+ parser->current.end += eol_length;
10584
10594
  return delimiter;
10585
10595
  }
10586
10596
 
@@ -10690,6 +10700,14 @@ parser_lex(pm_parser_t *parser) {
10690
10700
  // We'll check if we're at the end of the file. If we are, then we
10691
10701
  // need to return the EOF token.
10692
10702
  if (parser->current.end >= parser->end) {
10703
+ // If we hit EOF, but the EOF came immediately after a newline,
10704
+ // set the start of the token to the newline. This way any EOF
10705
+ // errors will be reported as happening on that line rather than
10706
+ // a line after. For example "foo(\n" should report an error
10707
+ // on line 1 even though EOF technically occurs on line 2.
10708
+ if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10709
+ parser->current.start -= 1;
10710
+ }
10693
10711
  LEX(PM_TOKEN_EOF);
10694
10712
  }
10695
10713
 
@@ -10732,7 +10750,7 @@ parser_lex(pm_parser_t *parser) {
10732
10750
 
10733
10751
  lexed_comment = true;
10734
10752
  }
10735
- /* fallthrough */
10753
+ PRISM_FALLTHROUGH
10736
10754
  case '\r':
10737
10755
  case '\n': {
10738
10756
  parser->semantic_token_seen = semantic_token_seen & 0x1;
@@ -10774,7 +10792,7 @@ parser_lex(pm_parser_t *parser) {
10774
10792
  parser->current.type = PM_TOKEN_NEWLINE;
10775
10793
  return;
10776
10794
  }
10777
- /* fallthrough */
10795
+ PRISM_FALLTHROUGH
10778
10796
  case PM_IGNORED_NEWLINE_ALL:
10779
10797
  if (!lexed_comment) parser_lex_ignored_newline(parser);
10780
10798
  lexed_comment = false;
@@ -10871,6 +10889,10 @@ parser_lex(pm_parser_t *parser) {
10871
10889
 
10872
10890
  // ,
10873
10891
  case ',':
10892
+ if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10893
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10894
+ }
10895
+
10874
10896
  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10875
10897
  LEX(PM_TOKEN_COMMA);
10876
10898
 
@@ -11153,12 +11175,14 @@ parser_lex(pm_parser_t *parser) {
11153
11175
  lex_mode_push(parser, (pm_lex_mode_t) {
11154
11176
  .mode = PM_LEX_HEREDOC,
11155
11177
  .as.heredoc = {
11156
- .ident_start = ident_start,
11157
- .ident_length = ident_length,
11178
+ .base = {
11179
+ .ident_start = ident_start,
11180
+ .ident_length = ident_length,
11181
+ .quote = quote,
11182
+ .indent = indent
11183
+ },
11158
11184
  .next_start = parser->current.end,
11159
- .quote = quote,
11160
- .indent = indent,
11161
- .common_whitespace = (size_t) -1,
11185
+ .common_whitespace = NULL,
11162
11186
  .line_continuation = false
11163
11187
  }
11164
11188
  });
@@ -11171,7 +11195,7 @@ parser_lex(pm_parser_t *parser) {
11171
11195
  // this is not a valid heredoc declaration. In this case we
11172
11196
  // will add an error, but we will still return a heredoc
11173
11197
  // start.
11174
- if (!ident_error) pm_parser_err_heredoc_term(parser, parser->lex_modes.current);
11198
+ if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
11175
11199
  body_start = parser->end;
11176
11200
  } else {
11177
11201
  // Otherwise, we want to indicate that the body of the
@@ -11783,7 +11807,7 @@ parser_lex(pm_parser_t *parser) {
11783
11807
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11784
11808
  break;
11785
11809
  }
11786
- /* fallthrough */
11810
+ PRISM_FALLTHROUGH
11787
11811
  default:
11788
11812
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11789
11813
  break;
@@ -11980,7 +12004,7 @@ parser_lex(pm_parser_t *parser) {
11980
12004
  pm_token_buffer_push_byte(&token_buffer, '\r');
11981
12005
  break;
11982
12006
  }
11983
- /* fallthrough */
12007
+ PRISM_FALLTHROUGH
11984
12008
  case '\n':
11985
12009
  pm_token_buffer_push_byte(&token_buffer, '\n');
11986
12010
 
@@ -12084,9 +12108,28 @@ parser_lex(pm_parser_t *parser) {
12084
12108
  pm_regexp_token_buffer_t token_buffer = { 0 };
12085
12109
 
12086
12110
  while (breakpoint != NULL) {
12111
+ uint8_t term = lex_mode->as.regexp.terminator;
12112
+ bool is_terminator = (*breakpoint == term);
12113
+
12114
+ // If the terminator is newline, we need to consider \r\n _also_ a newline
12115
+ // For example: `%\nfoo\r\n`
12116
+ // The string should be "foo", not "foo\r"
12117
+ if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12118
+ if (term == '\n') {
12119
+ is_terminator = true;
12120
+ }
12121
+
12122
+ // If the terminator is a CR, but we see a CRLF, we need to
12123
+ // treat the CRLF as a newline, meaning this is _not_ the
12124
+ // terminator
12125
+ if (term == '\r') {
12126
+ is_terminator = false;
12127
+ }
12128
+ }
12129
+
12087
12130
  // If we hit the terminator, we need to determine what kind of
12088
12131
  // token to return.
12089
- if (*breakpoint == lex_mode->as.regexp.terminator) {
12132
+ if (is_terminator) {
12090
12133
  if (lex_mode->as.regexp.nesting > 0) {
12091
12134
  parser->current.end = breakpoint + 1;
12092
12135
  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
@@ -12148,7 +12191,7 @@ parser_lex(pm_parser_t *parser) {
12148
12191
  pm_regexp_token_buffer_escape(parser, &token_buffer);
12149
12192
  token_buffer.base.cursor = breakpoint;
12150
12193
 
12151
- /* fallthrough */
12194
+ PRISM_FALLTHROUGH
12152
12195
  case '\n':
12153
12196
  // If we've hit a newline, then we need to track that in
12154
12197
  // the list of newlines.
@@ -12190,7 +12233,7 @@ parser_lex(pm_parser_t *parser) {
12190
12233
  pm_token_buffer_push_byte(&token_buffer.base, '\r');
12191
12234
  break;
12192
12235
  }
12193
- /* fallthrough */
12236
+ PRISM_FALLTHROUGH
12194
12237
  case '\n':
12195
12238
  if (parser->heredoc_end) {
12196
12239
  // ... if we are on the same line as a heredoc,
@@ -12316,10 +12359,29 @@ parser_lex(pm_parser_t *parser) {
12316
12359
  continue;
12317
12360
  }
12318
12361
 
12362
+ uint8_t term = lex_mode->as.string.terminator;
12363
+ bool is_terminator = (*breakpoint == term);
12364
+
12365
+ // If the terminator is newline, we need to consider \r\n _also_ a newline
12366
+ // For example: `%r\nfoo\r\n`
12367
+ // The string should be /foo/, not /foo\r/
12368
+ if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12369
+ if (term == '\n') {
12370
+ is_terminator = true;
12371
+ }
12372
+
12373
+ // If the terminator is a CR, but we see a CRLF, we need to
12374
+ // treat the CRLF as a newline, meaning this is _not_ the
12375
+ // terminator
12376
+ if (term == '\r') {
12377
+ is_terminator = false;
12378
+ }
12379
+ }
12380
+
12319
12381
  // Note that we have to check the terminator here first because we could
12320
12382
  // potentially be parsing a % string that has a # character as the
12321
12383
  // terminator.
12322
- if (*breakpoint == lex_mode->as.string.terminator) {
12384
+ if (is_terminator) {
12323
12385
  // If this terminator doesn't actually close the string, then we need
12324
12386
  // to continue on past it.
12325
12387
  if (lex_mode->as.string.nesting > 0) {
@@ -12379,7 +12441,7 @@ parser_lex(pm_parser_t *parser) {
12379
12441
  pm_token_buffer_escape(parser, &token_buffer);
12380
12442
  token_buffer.cursor = breakpoint;
12381
12443
 
12382
- /* fallthrough */
12444
+ PRISM_FALLTHROUGH
12383
12445
  case '\n':
12384
12446
  // When we hit a newline, we need to flush any potential
12385
12447
  // heredocs. Note that this has to happen after we check
@@ -12424,7 +12486,7 @@ parser_lex(pm_parser_t *parser) {
12424
12486
  pm_token_buffer_push_byte(&token_buffer, '\r');
12425
12487
  break;
12426
12488
  }
12427
- /* fallthrough */
12489
+ PRISM_FALLTHROUGH
12428
12490
  case '\n':
12429
12491
  if (!lex_mode->as.string.interpolation) {
12430
12492
  pm_token_buffer_push_byte(&token_buffer, '\\');
@@ -12514,6 +12576,7 @@ parser_lex(pm_parser_t *parser) {
12514
12576
  // Now let's grab the information about the identifier off of the
12515
12577
  // current lex mode.
12516
12578
  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12579
+ pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12517
12580
 
12518
12581
  bool line_continuation = lex_mode->as.heredoc.line_continuation;
12519
12582
  lex_mode->as.heredoc.line_continuation = false;
@@ -12523,15 +12586,16 @@ parser_lex(pm_parser_t *parser) {
12523
12586
  // terminator) but still continue parsing so that content after the
12524
12587
  // declaration of the heredoc can be parsed.
12525
12588
  if (parser->current.end >= parser->end) {
12526
- pm_parser_err_heredoc_term(parser, lex_mode);
12589
+ pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12527
12590
  parser->next_start = lex_mode->as.heredoc.next_start;
12528
12591
  parser->heredoc_end = parser->current.end;
12529
12592
  lex_state_set(parser, PM_LEX_STATE_END);
12593
+ lex_mode_pop(parser);
12530
12594
  LEX(PM_TOKEN_HEREDOC_END);
12531
12595
  }
12532
12596
 
12533
- const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
12534
- size_t ident_length = lex_mode->as.heredoc.ident_length;
12597
+ const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12598
+ size_t ident_length = heredoc_lex_mode->ident_length;
12535
12599
 
12536
12600
  // If we are immediately following a newline and we have hit the
12537
12601
  // terminator, then we need to return the ending of the heredoc.
@@ -12556,10 +12620,7 @@ parser_lex(pm_parser_t *parser) {
12556
12620
  const uint8_t *terminator_start = ident_end - ident_length;
12557
12621
  const uint8_t *cursor = start;
12558
12622
 
12559
- if (
12560
- lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_DASH ||
12561
- lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE
12562
- ) {
12623
+ if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12563
12624
  while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12564
12625
  cursor++;
12565
12626
  }
@@ -12582,17 +12643,19 @@ parser_lex(pm_parser_t *parser) {
12582
12643
  }
12583
12644
 
12584
12645
  lex_state_set(parser, PM_LEX_STATE_END);
12646
+ lex_mode_pop(parser);
12585
12647
  LEX(PM_TOKEN_HEREDOC_END);
12586
12648
  }
12587
12649
  }
12588
12650
 
12589
- size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
12651
+ size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12590
12652
  if (
12591
- lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE &&
12592
- (lex_mode->as.heredoc.common_whitespace > whitespace) &&
12653
+ heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12654
+ lex_mode->as.heredoc.common_whitespace != NULL &&
12655
+ (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12593
12656
  peek_at(parser, start) != '\n'
12594
12657
  ) {
12595
- lex_mode->as.heredoc.common_whitespace = whitespace;
12658
+ *lex_mode->as.heredoc.common_whitespace = whitespace;
12596
12659
  }
12597
12660
  }
12598
12661
 
@@ -12601,7 +12664,7 @@ parser_lex(pm_parser_t *parser) {
12601
12664
  // strpbrk to find the first of these characters.
12602
12665
  uint8_t breakpoints[] = "\r\n\\#";
12603
12666
 
12604
- pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
12667
+ pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12605
12668
  if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12606
12669
  breakpoints[3] = '\0';
12607
12670
  }
@@ -12631,7 +12694,7 @@ parser_lex(pm_parser_t *parser) {
12631
12694
  pm_token_buffer_escape(parser, &token_buffer);
12632
12695
  token_buffer.cursor = breakpoint;
12633
12696
 
12634
- /* fallthrough */
12697
+ PRISM_FALLTHROUGH
12635
12698
  case '\n': {
12636
12699
  if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12637
12700
  parser_flush_heredoc_end(parser);
@@ -12664,8 +12727,7 @@ parser_lex(pm_parser_t *parser) {
12664
12727
  // leading whitespace if we have a - or ~ heredoc.
12665
12728
  const uint8_t *cursor = start;
12666
12729
 
12667
- if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_DASH ||
12668
- lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
12730
+ if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12669
12731
  while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12670
12732
  cursor++;
12671
12733
  }
@@ -12681,16 +12743,16 @@ parser_lex(pm_parser_t *parser) {
12681
12743
  }
12682
12744
  }
12683
12745
 
12684
- size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
12746
+ size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12685
12747
 
12686
12748
  // If we have hit a newline that is followed by a valid
12687
12749
  // terminator, then we need to return the content of the
12688
12750
  // heredoc here as string content. Then, the next time a
12689
12751
  // token is lexed, it will match again and return the
12690
12752
  // end of the heredoc.
12691
- if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
12692
- if ((lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12693
- lex_mode->as.heredoc.common_whitespace = whitespace;
12753
+ if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12754
+ if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12755
+ *lex_mode->as.heredoc.common_whitespace = whitespace;
12694
12756
  }
12695
12757
 
12696
12758
  parser->current.end = breakpoint + 1;
@@ -12732,7 +12794,7 @@ parser_lex(pm_parser_t *parser) {
12732
12794
  pm_token_buffer_push_byte(&token_buffer, '\r');
12733
12795
  break;
12734
12796
  }
12735
- /* fallthrough */
12797
+ PRISM_FALLTHROUGH
12736
12798
  case '\n':
12737
12799
  pm_token_buffer_push_byte(&token_buffer, '\\');
12738
12800
  pm_token_buffer_push_byte(&token_buffer, '\n');
@@ -12752,12 +12814,12 @@ parser_lex(pm_parser_t *parser) {
12752
12814
  pm_token_buffer_push_byte(&token_buffer, '\r');
12753
12815
  break;
12754
12816
  }
12755
- /* fallthrough */
12817
+ PRISM_FALLTHROUGH
12756
12818
  case '\n':
12757
12819
  // If we are in a tilde here, we should
12758
12820
  // break out of the loop and return the
12759
12821
  // string content.
12760
- if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
12822
+ if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12761
12823
  const uint8_t *end = parser->current.end;
12762
12824
  pm_newline_list_append(&parser->newline_list, end);
12763
12825
 
@@ -12983,7 +13045,7 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12983
13045
  [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12984
13046
  [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12985
13047
  [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12986
- [PM_TOKEN_USTAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13048
+ [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12987
13049
 
12988
13050
  // -@
12989
13051
  [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
@@ -13044,14 +13106,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
13044
13106
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13045
13107
  }
13046
13108
 
13047
- /**
13048
- * Returns true if the current token is any of the six given types.
13049
- */
13050
- static inline bool
13051
- match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
13052
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
13053
- }
13054
-
13055
13109
  /**
13056
13110
  * Returns true if the current token is any of the seven given types.
13057
13111
  */
@@ -13068,6 +13122,14 @@ match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
13068
13122
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13069
13123
  }
13070
13124
 
13125
+ /**
13126
+ * Returns true if the current token is any of the nine given types.
13127
+ */
13128
+ static inline bool
13129
+ match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
13130
+ return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
13131
+ }
13132
+
13071
13133
  /**
13072
13134
  * If the current token is of the specified type, lex forward by one token and
13073
13135
  * return true. Otherwise, return false. For example:
@@ -13096,19 +13158,6 @@ accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13096
13158
  return false;
13097
13159
  }
13098
13160
 
13099
- /**
13100
- * If the current token is any of the three given types, lex forward by one
13101
- * token and return true. Otherwise return false.
13102
- */
13103
- static inline bool
13104
- accept3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13105
- if (match3(parser, type1, type2, type3)) {
13106
- parser_lex(parser);
13107
- return true;
13108
- }
13109
- return false;
13110
- }
13111
-
13112
13161
  /**
13113
13162
  * This function indicates that the parser expects a token in a specific
13114
13163
  * position. For example, if you're parsing a BEGIN block, you know that a { is
@@ -13146,32 +13195,16 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
13146
13195
  parser->previous.type = PM_TOKEN_MISSING;
13147
13196
  }
13148
13197
 
13149
- /**
13150
- * This function is the same as expect2, but it expects one of three token types.
13151
- */
13152
- static void
13153
- expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_diagnostic_id_t diag_id) {
13154
- if (accept3(parser, type1, type2, type3)) return;
13155
-
13156
- const uint8_t *location = parser->previous.end;
13157
- pm_parser_err(parser, location, location, diag_id);
13158
-
13159
- parser->previous.start = location;
13160
- parser->previous.type = PM_TOKEN_MISSING;
13161
- }
13162
-
13163
13198
  /**
13164
13199
  * A special expect1 that expects a heredoc terminator and handles popping the
13165
13200
  * lex mode accordingly.
13166
13201
  */
13167
13202
  static void
13168
- expect1_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
13203
+ expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
13169
13204
  if (match1(parser, PM_TOKEN_HEREDOC_END)) {
13170
- lex_mode_pop(parser);
13171
13205
  parser_lex(parser);
13172
13206
  } else {
13173
- pm_parser_err_heredoc_term(parser, lex_mode);
13174
- lex_mode_pop(parser);
13207
+ pm_parser_err_heredoc_term(parser, ident_start, ident_length);
13175
13208
  parser->previous.start = parser->previous.end;
13176
13209
  parser->previous.type = PM_TOKEN_MISSING;
13177
13210
  }
@@ -13503,7 +13536,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
13503
13536
  return (pm_node_t *) pm_index_target_node_create(parser, call);
13504
13537
  }
13505
13538
  }
13506
- /* fallthrough */
13539
+ PRISM_FALLTHROUGH
13507
13540
  default:
13508
13541
  // In this case we have a node that we don't know how to convert
13509
13542
  // into a target. We need to treat it as an error. For now, we'll
@@ -13585,7 +13618,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13585
13618
  case PM_BACK_REFERENCE_READ_NODE:
13586
13619
  case PM_NUMBERED_REFERENCE_READ_NODE:
13587
13620
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13588
- /* fallthrough */
13621
+ PRISM_FALLTHROUGH
13589
13622
  case PM_GLOBAL_VARIABLE_READ_NODE: {
13590
13623
  pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13591
13624
  pm_node_destroy(parser, target);
@@ -13712,6 +13745,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13712
13745
 
13713
13746
  // Replace the name with "[]=".
13714
13747
  call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13748
+
13749
+ // Ensure that the arguments for []= don't contain keywords
13750
+ pm_index_arguments_check(parser, call->arguments, call->block);
13715
13751
  pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13716
13752
 
13717
13753
  return target;
@@ -13724,7 +13760,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13724
13760
  // is no way for us to attach it to the tree at this point.
13725
13761
  pm_node_destroy(parser, value);
13726
13762
  }
13727
- /* fallthrough */
13763
+ PRISM_FALLTHROUGH
13728
13764
  default:
13729
13765
  // In this case we have a node that we don't know how to convert into a
13730
13766
  // target. We need to treat it as an error. For now, we'll mark it as an
@@ -13797,6 +13833,13 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13797
13833
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
13798
13834
  pm_multi_target_node_targets_append(parser, result, splat);
13799
13835
  has_rest = true;
13836
+ } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13837
+ context_push(parser, PM_CONTEXT_MULTI_TARGET);
13838
+ pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13839
+ target = parse_target(parser, target, true, false);
13840
+
13841
+ pm_multi_target_node_targets_append(parser, result, target);
13842
+ context_pop(parser);
13800
13843
  } else if (token_begins_expression_p(parser->current.type)) {
13801
13844
  pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13802
13845
  target = parse_target(parser, target, true, false);
@@ -14108,8 +14151,8 @@ static void
14108
14151
  parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
14109
14152
  pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14110
14153
 
14111
- // First we need to check if the next token is one that could be the start of
14112
- // an argument. If it's not, then we can just return.
14154
+ // First we need to check if the next token is one that could be the start
14155
+ // of an argument. If it's not, then we can just return.
14113
14156
  if (
14114
14157
  match2(parser, terminator, PM_TOKEN_EOF) ||
14115
14158
  (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
@@ -14186,6 +14229,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
14186
14229
  if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
14187
14230
  pm_parser_scope_forwarding_positionals_check(parser, &operator);
14188
14231
  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14232
+ if (parsed_bare_hash) {
14233
+ pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14234
+ }
14189
14235
  } else {
14190
14236
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14191
14237
 
@@ -14234,7 +14280,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
14234
14280
  }
14235
14281
  }
14236
14282
  }
14237
- /* fallthrough */
14283
+ PRISM_FALLTHROUGH
14238
14284
  default: {
14239
14285
  if (argument == NULL) {
14240
14286
  argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
@@ -14297,23 +14343,32 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
14297
14343
  // If parsing the argument failed, we need to stop parsing arguments.
14298
14344
  if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
14299
14345
 
14300
- // If the terminator of these arguments is not EOF, then we have a specific
14301
- // token we're looking for. In that case we can accept a newline here
14302
- // because it is not functioning as a statement terminator.
14303
- if (terminator != PM_TOKEN_EOF) accept1(parser, PM_TOKEN_NEWLINE);
14346
+ // If the terminator of these arguments is not EOF, then we have a
14347
+ // specific token we're looking for. In that case we can accept a
14348
+ // newline here because it is not functioning as a statement terminator.
14349
+ bool accepted_newline = false;
14350
+ if (terminator != PM_TOKEN_EOF) {
14351
+ accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14352
+ }
14304
14353
 
14305
14354
  if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
14306
- // If we previously were on a comma and we just parsed a bare hash, then
14307
- // we want to continue parsing arguments. This is because the comma was
14308
- // grabbed up by the hash parser.
14355
+ // If we previously were on a comma and we just parsed a bare hash,
14356
+ // then we want to continue parsing arguments. This is because the
14357
+ // comma was grabbed up by the hash parser.
14358
+ } else if (accept1(parser, PM_TOKEN_COMMA)) {
14359
+ // If there was a comma, then we need to check if we also accepted a
14360
+ // newline. If we did, then this is a syntax error.
14361
+ if (accepted_newline) {
14362
+ pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14363
+ }
14309
14364
  } else {
14310
- // If there is no comma at the end of the argument list then we're done
14311
- // parsing arguments and can break out of this loop.
14312
- if (!accept1(parser, PM_TOKEN_COMMA)) break;
14365
+ // If there is no comma at the end of the argument list then we're
14366
+ // done parsing arguments and can break out of this loop.
14367
+ break;
14313
14368
  }
14314
14369
 
14315
- // If we hit the terminator, then that means we have a trailing comma so we
14316
- // can accept that output as well.
14370
+ // If we hit the terminator, then that means we have a trailing comma so
14371
+ // we can accept that output as well.
14317
14372
  if (match1(parser, terminator)) break;
14318
14373
  }
14319
14374
  }
@@ -14468,15 +14523,17 @@ parse_parameters(
14468
14523
  bool allows_trailing_comma,
14469
14524
  bool allows_forwarding_parameters,
14470
14525
  bool accepts_blocks_in_defaults,
14526
+ bool in_block,
14471
14527
  uint16_t depth
14472
14528
  ) {
14473
- pm_parameters_node_t *params = pm_parameters_node_create(parser);
14474
- bool looping = true;
14475
-
14476
14529
  pm_do_loop_stack_push(parser, false);
14530
+
14531
+ pm_parameters_node_t *params = pm_parameters_node_create(parser);
14477
14532
  pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14478
14533
 
14479
- do {
14534
+ while (true) {
14535
+ bool parsing = true;
14536
+
14480
14537
  switch (parser->current.type) {
14481
14538
  case PM_TOKEN_PARENTHESIS_LEFT: {
14482
14539
  update_parameter_state(parser, &parser->current, &order);
@@ -14611,7 +14668,7 @@ parse_parameters(
14611
14668
  // then we can put a missing node in its place and stop parsing the
14612
14669
  // parameters entirely now.
14613
14670
  if (parser->recovering) {
14614
- looping = false;
14671
+ parsing = false;
14615
14672
  break;
14616
14673
  }
14617
14674
  } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
@@ -14631,7 +14688,7 @@ parse_parameters(
14631
14688
  break;
14632
14689
  }
14633
14690
  case PM_TOKEN_LABEL: {
14634
- if (!uses_parentheses) parser->in_keyword_arg = true;
14691
+ if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14635
14692
  update_parameter_state(parser, &parser->current, &order);
14636
14693
 
14637
14694
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
@@ -14669,7 +14726,7 @@ parse_parameters(
14669
14726
  context_pop(parser);
14670
14727
 
14671
14728
  if (uses_parentheses) {
14672
- looping = false;
14729
+ parsing = false;
14673
14730
  break;
14674
14731
  }
14675
14732
 
@@ -14713,7 +14770,7 @@ parse_parameters(
14713
14770
  // then we can put a missing node in its place and stop parsing the
14714
14771
  // parameters entirely now.
14715
14772
  if (parser->recovering) {
14716
- looping = false;
14773
+ parsing = false;
14717
14774
  break;
14718
14775
  }
14719
14776
  }
@@ -14815,14 +14872,31 @@ parse_parameters(
14815
14872
  }
14816
14873
  }
14817
14874
 
14818
- looping = false;
14875
+ parsing = false;
14819
14876
  break;
14820
14877
  }
14821
14878
 
14822
- if (looping && uses_parentheses) {
14823
- accept1(parser, PM_TOKEN_NEWLINE);
14879
+ // If we hit some kind of issue while parsing the parameter, this would
14880
+ // have been set to false. In that case, we need to break out of the
14881
+ // loop.
14882
+ if (!parsing) break;
14883
+
14884
+ bool accepted_newline = false;
14885
+ if (uses_parentheses) {
14886
+ accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14824
14887
  }
14825
- } while (looping && accept1(parser, PM_TOKEN_COMMA));
14888
+
14889
+ if (accept1(parser, PM_TOKEN_COMMA)) {
14890
+ // If there was a comma, but we also accepted a newline, then this
14891
+ // is a syntax error.
14892
+ if (accepted_newline) {
14893
+ pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14894
+ }
14895
+ } else {
14896
+ // If there was no comma, then we're done parsing parameters.
14897
+ break;
14898
+ }
14899
+ }
14826
14900
 
14827
14901
  pm_do_loop_stack_pop(parser);
14828
14902
 
@@ -15083,7 +15157,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
15083
15157
  case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15084
15158
  case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15085
15159
  case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15086
- default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15160
+ default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15087
15161
  }
15088
15162
 
15089
15163
  else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
@@ -15178,6 +15252,7 @@ parse_block_parameters(
15178
15252
  allows_trailing_comma,
15179
15253
  false,
15180
15254
  accepts_blocks_in_defaults,
15255
+ true,
15181
15256
  (uint16_t) (depth + 1)
15182
15257
  );
15183
15258
  }
@@ -15500,6 +15575,7 @@ parse_return(pm_parser_t *parser, pm_node_t *node) {
15500
15575
  case PM_CONTEXT_IF:
15501
15576
  case PM_CONTEXT_LOOP_PREDICATE:
15502
15577
  case PM_CONTEXT_MAIN:
15578
+ case PM_CONTEXT_MULTI_TARGET:
15503
15579
  case PM_CONTEXT_PARENS:
15504
15580
  case PM_CONTEXT_POSTEXE:
15505
15581
  case PM_CONTEXT_PREDICATE:
@@ -15628,6 +15704,7 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15628
15704
  case PM_CONTEXT_MODULE_ENSURE:
15629
15705
  case PM_CONTEXT_MODULE_RESCUE:
15630
15706
  case PM_CONTEXT_MODULE:
15707
+ case PM_CONTEXT_MULTI_TARGET:
15631
15708
  case PM_CONTEXT_PARENS:
15632
15709
  case PM_CONTEXT_PREDICATE:
15633
15710
  case PM_CONTEXT_RESCUE_MODIFIER:
@@ -16091,7 +16168,7 @@ parse_operator_symbol_name(const pm_token_t *name) {
16091
16168
  case PM_TOKEN_TILDE:
16092
16169
  case PM_TOKEN_BANG:
16093
16170
  if (name->end[-1] == '@') return name->end - 1;
16094
- /* fallthrough */
16171
+ PRISM_FALLTHROUGH
16095
16172
  default:
16096
16173
  return name->end;
16097
16174
  }
@@ -16347,14 +16424,15 @@ static pm_node_t *
16347
16424
  parse_variable(pm_parser_t *parser) {
16348
16425
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16349
16426
  int depth;
16427
+ bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
16350
16428
 
16351
- if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
16429
+ if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16352
16430
  return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16353
16431
  }
16354
16432
 
16355
16433
  pm_scope_t *current_scope = parser->current_scope;
16356
16434
  if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16357
- if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
16435
+ if (is_numbered_param) {
16358
16436
  // When you use a numbered parameter, it implies the existence of
16359
16437
  // all of the locals that exist before it. For example, referencing
16360
16438
  // _2 means that _1 must exist. Therefore here we loop through all
@@ -17045,7 +17123,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
17045
17123
  parse_pattern_hash_key(parser, &keys, first_node);
17046
17124
  pm_node_t *value;
17047
17125
 
17048
- if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17126
+ if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
17049
17127
  // Otherwise, we will create an implicit local variable
17050
17128
  // target for the value.
17051
17129
  value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
@@ -17062,7 +17140,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
17062
17140
  break;
17063
17141
  }
17064
17142
  }
17065
- /* fallthrough */
17143
+ PRISM_FALLTHROUGH
17066
17144
  default: {
17067
17145
  // If we get anything else, then this is an error. For this we'll
17068
17146
  // create a missing node for the value and create an assoc node for
@@ -17082,7 +17160,12 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
17082
17160
  // If there are any other assocs, then we'll parse them now.
17083
17161
  while (accept1(parser, PM_TOKEN_COMMA)) {
17084
17162
  // Here we need to break to support trailing commas.
17085
- if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17163
+ if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
17164
+ // Trailing commas are not allowed to follow a rest pattern.
17165
+ if (rest != NULL) {
17166
+ pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17167
+ }
17168
+
17086
17169
  break;
17087
17170
  }
17088
17171
 
@@ -17553,7 +17636,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
17553
17636
  break;
17554
17637
  }
17555
17638
  }
17556
- /* fallthrough */
17639
+ PRISM_FALLTHROUGH
17557
17640
  default:
17558
17641
  node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17559
17642
  break;
@@ -17575,9 +17658,10 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
17575
17658
  // Gather up all of the patterns into the list.
17576
17659
  while (accept1(parser, PM_TOKEN_COMMA)) {
17577
17660
  // Break early here in case we have a trailing comma.
17578
- if (match4(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_SEMICOLON)) {
17661
+ if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17579
17662
  node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17580
17663
  pm_node_list_append(&nodes, node);
17664
+ trailing_rest = true;
17581
17665
  break;
17582
17666
  }
17583
17667
 
@@ -17779,6 +17863,7 @@ parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17779
17863
  case PM_CONTEXT_LAMBDA_BRACES:
17780
17864
  case PM_CONTEXT_LAMBDA_DO_END:
17781
17865
  case PM_CONTEXT_LOOP_PREDICATE:
17866
+ case PM_CONTEXT_MULTI_TARGET:
17782
17867
  case PM_CONTEXT_PARENS:
17783
17868
  case PM_CONTEXT_POSTEXE:
17784
17869
  case PM_CONTEXT_PREDICATE:
@@ -17862,6 +17947,7 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17862
17947
  case PM_CONTEXT_LAMBDA_ENSURE:
17863
17948
  case PM_CONTEXT_LAMBDA_RESCUE:
17864
17949
  case PM_CONTEXT_LOOP_PREDICATE:
17950
+ case PM_CONTEXT_MULTI_TARGET:
17865
17951
  case PM_CONTEXT_PARENS:
17866
17952
  case PM_CONTEXT_POSTEXE:
17867
17953
  case PM_CONTEXT_PREDICATE:
@@ -17951,19 +18037,31 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17951
18037
  bool parsed_bare_hash = false;
17952
18038
 
17953
18039
  while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
18040
+ bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
18041
+
17954
18042
  // Handle the case where we don't have a comma and we have a
17955
18043
  // newline followed by a right bracket.
17956
- if (accept1(parser, PM_TOKEN_NEWLINE) && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18044
+ if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17957
18045
  break;
17958
18046
  }
17959
18047
 
17960
18048
  // Ensure that we have a comma between elements in the array.
17961
- if ((pm_array_node_size(array) != 0) && !accept1(parser, PM_TOKEN_COMMA)) {
17962
- const uint8_t *location = parser->previous.end;
17963
- PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
18049
+ if (array->elements.size > 0) {
18050
+ if (accept1(parser, PM_TOKEN_COMMA)) {
18051
+ // If there was a comma but we also accepts a newline,
18052
+ // then this is a syntax error.
18053
+ if (accepted_newline) {
18054
+ pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
18055
+ }
18056
+ } else {
18057
+ // If there was no comma, then we need to add a syntax
18058
+ // error.
18059
+ const uint8_t *location = parser->previous.end;
18060
+ PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
17964
18061
 
17965
- parser->previous.start = location;
17966
- parser->previous.type = PM_TOKEN_MISSING;
18062
+ parser->previous.start = location;
18063
+ parser->previous.type = PM_TOKEN_MISSING;
18064
+ }
17967
18065
  }
17968
18066
 
17969
18067
  // If we have a right bracket immediately following a comma,
@@ -18119,14 +18217,32 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18119
18217
  multi_target->base.location.start = lparen_loc.start;
18120
18218
  multi_target->base.location.end = rparen_loc.end;
18121
18219
 
18122
- if (match1(parser, PM_TOKEN_COMMA)) {
18123
- if (binding_power == PM_BINDING_POWER_STATEMENT) {
18124
- return parse_targets_validate(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18125
- }
18126
- return (pm_node_t *) multi_target;
18220
+ pm_node_t *result;
18221
+ if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18222
+ result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18223
+ accept1(parser, PM_TOKEN_NEWLINE);
18224
+ } else {
18225
+ result = (pm_node_t *) multi_target;
18127
18226
  }
18128
18227
 
18129
- return parse_target_validate(parser, (pm_node_t *) multi_target, false);
18228
+ if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18229
+ // All set, this is explicitly allowed by the parent
18230
+ // context.
18231
+ } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18232
+ // All set, we're inside a for loop and we're parsing
18233
+ // multiple targets.
18234
+ } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18235
+ // Multi targets are not allowed when it's not a
18236
+ // statement level.
18237
+ pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18238
+ } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18239
+ // Multi targets must be followed by an equal sign in
18240
+ // order to be valid (or a right parenthesis if they are
18241
+ // nested).
18242
+ pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18243
+ }
18244
+
18245
+ return result;
18130
18246
  }
18131
18247
 
18132
18248
  // If we have a single statement and are ending on a right parenthesis
@@ -18187,6 +18303,33 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18187
18303
  pm_accepts_block_stack_pop(parser);
18188
18304
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18189
18305
 
18306
+ // When we're parsing multi targets, we allow them to be followed by
18307
+ // a right parenthesis if they are at the statement level. This is
18308
+ // only possible if they are the final statement in a parentheses.
18309
+ // We need to explicitly reject that here.
18310
+ {
18311
+ pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
18312
+
18313
+ if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18314
+ pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
18315
+ pm_multi_target_node_targets_append(parser, multi_target, statement);
18316
+
18317
+ statement = (pm_node_t *) multi_target;
18318
+ statements->body.nodes[statements->body.size - 1] = statement;
18319
+ }
18320
+
18321
+ if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
18322
+ const uint8_t *offset = statement->location.end;
18323
+ pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
18324
+ pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
18325
+
18326
+ statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
18327
+ statements->body.nodes[statements->body.size - 1] = statement;
18328
+
18329
+ pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
18330
+ }
18331
+ }
18332
+
18190
18333
  pop_block_exits(parser, previous_block_exits);
18191
18334
  pm_node_list_free(&current_block_exits);
18192
18335
 
@@ -18442,10 +18585,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18442
18585
  case PM_TOKEN_HEREDOC_START: {
18443
18586
  // Here we have found a heredoc. We'll parse it and add it to the
18444
18587
  // list of strings.
18445
- pm_lex_mode_t *lex_mode = parser->lex_modes.current;
18446
- assert(lex_mode->mode == PM_LEX_HEREDOC);
18447
- pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
18448
- pm_heredoc_indent_t indent = lex_mode->as.heredoc.indent;
18588
+ assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18589
+ pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18590
+
18591
+ size_t common_whitespace = (size_t) -1;
18592
+ parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18449
18593
 
18450
18594
  parser_lex(parser);
18451
18595
  pm_token_t opening = parser->previous;
@@ -18456,10 +18600,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18456
18600
  if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18457
18601
  // If we get here, then we have an empty heredoc. We'll create
18458
18602
  // an empty content token and return an empty string node.
18459
- expect1_heredoc_term(parser, lex_mode);
18603
+ expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18460
18604
  pm_token_t content = parse_strings_empty_content(parser->previous.start);
18461
18605
 
18462
- if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
18606
+ if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18463
18607
  node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18464
18608
  } else {
18465
18609
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
@@ -18486,18 +18630,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18486
18630
  cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18487
18631
  cast->base.location = cast->opening_loc;
18488
18632
 
18489
- if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
18633
+ if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18490
18634
  assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18491
18635
  cast->base.type = PM_X_STRING_NODE;
18492
18636
  }
18493
18637
 
18494
- size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
18495
- if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18638
+ if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18496
18639
  parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18497
18640
  }
18498
18641
 
18499
18642
  node = (pm_node_t *) cast;
18500
- expect1_heredoc_term(parser, lex_mode);
18643
+ expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18501
18644
  } else {
18502
18645
  // If we get here, then we have multiple parts in the heredoc,
18503
18646
  // so we'll need to create an interpolated string node to hold
@@ -18511,15 +18654,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18511
18654
  }
18512
18655
  }
18513
18656
 
18514
- size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
18515
-
18516
18657
  // Now that we have all of the parts, create the correct type of
18517
18658
  // interpolated node.
18518
- if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
18659
+ if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18519
18660
  pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18520
18661
  cast->parts = parts;
18521
18662
 
18522
- expect1_heredoc_term(parser, lex_mode);
18663
+ expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18523
18664
  pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18524
18665
 
18525
18666
  cast->base.location = cast->opening_loc;
@@ -18528,7 +18669,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18528
18669
  pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18529
18670
  pm_node_list_free(&parts);
18530
18671
 
18531
- expect1_heredoc_term(parser, lex_mode);
18672
+ expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18532
18673
  pm_interpolated_string_node_closing_set(cast, &parser->previous);
18533
18674
 
18534
18675
  cast->base.location = cast->opening_loc;
@@ -18537,9 +18678,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18537
18678
 
18538
18679
  // If this is a heredoc that is indented with a ~, then we need
18539
18680
  // to dedent each line by the common leading whitespace.
18540
- if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18681
+ if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18541
18682
  pm_node_list_t *nodes;
18542
- if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
18683
+ if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18543
18684
  nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18544
18685
  } else {
18545
18686
  nodes = &((pm_interpolated_string_node_t *) node)->parts;
@@ -18625,7 +18766,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18625
18766
  pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18626
18767
  }
18627
18768
  }
18628
- /* fallthrough */
18769
+ PRISM_FALLTHROUGH
18629
18770
  default:
18630
18771
  return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18631
18772
  }
@@ -19116,6 +19257,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19116
19257
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
19117
19258
  parser_lex(parser);
19118
19259
 
19260
+ // This will be false if the method name is not a valid identifier
19261
+ // but could be followed by an operator.
19262
+ bool valid_name = true;
19263
+
19119
19264
  switch (parser->current.type) {
19120
19265
  case PM_CASE_OPERATOR:
19121
19266
  pm_parser_scope_push(parser, true);
@@ -19145,10 +19290,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19145
19290
 
19146
19291
  break;
19147
19292
  }
19148
- case PM_TOKEN_CONSTANT:
19149
19293
  case PM_TOKEN_INSTANCE_VARIABLE:
19150
19294
  case PM_TOKEN_CLASS_VARIABLE:
19151
19295
  case PM_TOKEN_GLOBAL_VARIABLE:
19296
+ valid_name = false;
19297
+ PRISM_FALLTHROUGH
19298
+ case PM_TOKEN_CONSTANT:
19152
19299
  case PM_TOKEN_KEYWORD_NIL:
19153
19300
  case PM_TOKEN_KEYWORD_SELF:
19154
19301
  case PM_TOKEN_KEYWORD_TRUE:
@@ -19206,6 +19353,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19206
19353
 
19207
19354
  name = parse_method_definition_name(parser);
19208
19355
  } else {
19356
+ if (!valid_name) {
19357
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19358
+ }
19359
+
19209
19360
  name = identifier;
19210
19361
  }
19211
19362
  break;
@@ -19256,7 +19407,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19256
19407
  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19257
19408
  params = NULL;
19258
19409
  } else {
19259
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, (uint16_t) (depth + 1));
19410
+ params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
19260
19411
  }
19261
19412
 
19262
19413
  lex_state_set(parser, PM_LEX_STATE_BEG);
@@ -19281,7 +19432,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19281
19432
 
19282
19433
  lparen = not_provided(parser);
19283
19434
  rparen = not_provided(parser);
19284
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, (uint16_t) (depth + 1));
19435
+ params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
19285
19436
 
19286
19437
  context_pop(parser);
19287
19438
  break;
@@ -19690,9 +19841,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19690
19841
  pm_do_loop_stack_pop(parser);
19691
19842
  context_pop(parser);
19692
19843
 
19693
- expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19694
- pm_statements_node_t *statements = NULL;
19844
+ pm_token_t do_keyword;
19845
+ if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19846
+ do_keyword = parser->previous;
19847
+ } else {
19848
+ do_keyword = not_provided(parser);
19849
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19850
+ }
19695
19851
 
19852
+ pm_statements_node_t *statements = NULL;
19696
19853
  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19697
19854
  pm_accepts_block_stack_push(parser, true);
19698
19855
  statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
@@ -19703,7 +19860,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19703
19860
  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19704
19861
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19705
19862
 
19706
- return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19863
+ return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19707
19864
  }
19708
19865
  case PM_TOKEN_KEYWORD_WHILE: {
19709
19866
  size_t opening_newline_index = token_newline_index(parser);
@@ -19718,9 +19875,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19718
19875
  pm_do_loop_stack_pop(parser);
19719
19876
  context_pop(parser);
19720
19877
 
19721
- expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19722
- pm_statements_node_t *statements = NULL;
19878
+ pm_token_t do_keyword;
19879
+ if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19880
+ do_keyword = parser->previous;
19881
+ } else {
19882
+ do_keyword = not_provided(parser);
19883
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19884
+ }
19723
19885
 
19886
+ pm_statements_node_t *statements = NULL;
19724
19887
  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19725
19888
  pm_accepts_block_stack_push(parser, true);
19726
19889
  statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
@@ -19731,7 +19894,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19731
19894
  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19732
19895
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19733
19896
 
19734
- return (pm_node_t *) pm_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19897
+ return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19735
19898
  }
19736
19899
  case PM_TOKEN_PERCENT_LOWER_I: {
19737
19900
  parser_lex(parser);
@@ -20801,7 +20964,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20801
20964
  pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
20802
20965
  }
20803
20966
  }
20804
- /* fallthrough */
20967
+ PRISM_FALLTHROUGH
20805
20968
  case PM_CASE_WRITABLE: {
20806
20969
  parser_lex(parser);
20807
20970
  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
@@ -20847,7 +21010,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20847
21010
  case PM_BACK_REFERENCE_READ_NODE:
20848
21011
  case PM_NUMBERED_REFERENCE_READ_NODE:
20849
21012
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20850
- /* fallthrough */
21013
+ PRISM_FALLTHROUGH
20851
21014
  case PM_GLOBAL_VARIABLE_READ_NODE: {
20852
21015
  parser_lex(parser);
20853
21016
 
@@ -20965,7 +21128,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20965
21128
  case PM_BACK_REFERENCE_READ_NODE:
20966
21129
  case PM_NUMBERED_REFERENCE_READ_NODE:
20967
21130
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20968
- /* fallthrough */
21131
+ PRISM_FALLTHROUGH
20969
21132
  case PM_GLOBAL_VARIABLE_READ_NODE: {
20970
21133
  parser_lex(parser);
20971
21134
 
@@ -21093,7 +21256,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21093
21256
  case PM_BACK_REFERENCE_READ_NODE:
21094
21257
  case PM_NUMBERED_REFERENCE_READ_NODE:
21095
21258
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21096
- /* fallthrough */
21259
+ PRISM_FALLTHROUGH
21097
21260
  case PM_GLOBAL_VARIABLE_READ_NODE: {
21098
21261
  parser_lex(parser);
21099
21262
 
@@ -21303,6 +21466,33 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21303
21466
  case PM_TOKEN_STAR:
21304
21467
  case PM_TOKEN_STAR_STAR: {
21305
21468
  parser_lex(parser);
21469
+ pm_token_t operator = parser->previous;
21470
+ switch (PM_NODE_TYPE(node)) {
21471
+ case PM_RESCUE_MODIFIER_NODE: {
21472
+ pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
21473
+ if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21474
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21475
+ }
21476
+ break;
21477
+ }
21478
+ case PM_AND_NODE: {
21479
+ pm_and_node_t *cast = (pm_and_node_t *) node;
21480
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21481
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21482
+ }
21483
+ break;
21484
+ }
21485
+ case PM_OR_NODE: {
21486
+ pm_or_node_t *cast = (pm_or_node_t *) node;
21487
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21488
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21489
+ }
21490
+ break;
21491
+ }
21492
+ default:
21493
+ break;
21494
+ }
21495
+
21306
21496
  pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21307
21497
  return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21308
21498
  }
@@ -21330,6 +21520,32 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21330
21520
  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21331
21521
  }
21332
21522
 
21523
+ switch (PM_NODE_TYPE(node)) {
21524
+ case PM_RESCUE_MODIFIER_NODE: {
21525
+ pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
21526
+ if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21527
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21528
+ }
21529
+ break;
21530
+ }
21531
+ case PM_AND_NODE: {
21532
+ pm_and_node_t *cast = (pm_and_node_t *) node;
21533
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21534
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21535
+ }
21536
+ break;
21537
+ }
21538
+ case PM_OR_NODE: {
21539
+ pm_or_node_t *cast = (pm_or_node_t *) node;
21540
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21541
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21542
+ }
21543
+ break;
21544
+ }
21545
+ default:
21546
+ break;
21547
+ }
21548
+
21333
21549
  pm_token_t message;
21334
21550
 
21335
21551
  switch (parser->current.type) {
@@ -21677,6 +21893,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
21677
21893
  if (pm_symbol_node_label_p(node)) {
21678
21894
  return node;
21679
21895
  }
21896
+ break;
21680
21897
  default:
21681
21898
  break;
21682
21899
  }
@@ -21684,8 +21901,11 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
21684
21901
  // Otherwise we'll look and see if the next token can be parsed as an infix
21685
21902
  // operator. If it can, then we'll parse it using parse_expression_infix.
21686
21903
  pm_binding_powers_t current_binding_powers;
21904
+ pm_token_type_t current_token_type;
21905
+
21687
21906
  while (
21688
- current_binding_powers = pm_binding_powers[parser->current.type],
21907
+ current_token_type = parser->current.type,
21908
+ current_binding_powers = pm_binding_powers[current_token_type],
21689
21909
  binding_power <= current_binding_powers.left &&
21690
21910
  current_binding_powers.binary
21691
21911
  ) {
@@ -21726,6 +21946,13 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
21726
21946
  // If the operator is nonassoc and we should not be able to parse the
21727
21947
  // upcoming infix operator, break.
21728
21948
  if (current_binding_powers.nonassoc) {
21949
+ // If this is a non-assoc operator and we are about to parse the
21950
+ // exact same operator, then we need to add an error.
21951
+ if (match1(parser, current_token_type)) {
21952
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21953
+ break;
21954
+ }
21955
+
21729
21956
  // If this is an endless range, then we need to reject a couple of
21730
21957
  // additional operators because it violates the normal operator
21731
21958
  // precedence rules. Those patterns are:
@@ -21735,7 +21962,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
21735
21962
  //
21736
21963
  if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
21737
21964
  if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
21738
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(parser->previous.type));
21965
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21739
21966
  break;
21740
21967
  }
21741
21968
 
@@ -21857,6 +22084,7 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
21857
22084
  ));
21858
22085
 
21859
22086
  pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
22087
+ pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
21860
22088
  }
21861
22089
 
21862
22090
  pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
@@ -22535,3 +22763,166 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
22535
22763
  }
22536
22764
 
22537
22765
  #endif
22766
+
22767
+ /******************************************************************************/
22768
+ /* Slice queries for the Ruby API */
22769
+ /******************************************************************************/
22770
+
22771
+ /** The category of slice returned from pm_slice_type. */
22772
+ typedef enum {
22773
+ /** Returned when the given encoding name is invalid. */
22774
+ PM_SLICE_TYPE_ERROR = -1,
22775
+
22776
+ /** Returned when no other types apply to the slice. */
22777
+ PM_SLICE_TYPE_NONE,
22778
+
22779
+ /** Returned when the slice is a valid local variable name. */
22780
+ PM_SLICE_TYPE_LOCAL,
22781
+
22782
+ /** Returned when the slice is a valid constant name. */
22783
+ PM_SLICE_TYPE_CONSTANT,
22784
+
22785
+ /** Returned when the slice is a valid method name. */
22786
+ PM_SLICE_TYPE_METHOD_NAME
22787
+ } pm_slice_type_t;
22788
+
22789
+ /**
22790
+ * Check that the slice is a valid local variable name or constant.
22791
+ */
22792
+ pm_slice_type_t
22793
+ pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22794
+ // first, get the right encoding object
22795
+ const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22796
+ if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22797
+
22798
+ // check that there is at least one character
22799
+ if (length == 0) return PM_SLICE_TYPE_NONE;
22800
+
22801
+ size_t width;
22802
+ if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22803
+ // valid because alphabetical
22804
+ } else if (*source == '_') {
22805
+ // valid because underscore
22806
+ width = 1;
22807
+ } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22808
+ // valid because multibyte
22809
+ } else {
22810
+ // invalid because no match
22811
+ return PM_SLICE_TYPE_NONE;
22812
+ }
22813
+
22814
+ // determine the type of the slice based on the first character
22815
+ const uint8_t *end = source + length;
22816
+ pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22817
+
22818
+ // next, iterate through all of the bytes of the string to ensure that they
22819
+ // are all valid identifier characters
22820
+ source += width;
22821
+
22822
+ while (source < end) {
22823
+ if ((width = encoding->alnum_char(source, end - source)) != 0) {
22824
+ // valid because alphanumeric
22825
+ source += width;
22826
+ } else if (*source == '_') {
22827
+ // valid because underscore
22828
+ source++;
22829
+ } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22830
+ // valid because multibyte
22831
+ source += width;
22832
+ } else {
22833
+ // invalid because no match
22834
+ break;
22835
+ }
22836
+ }
22837
+
22838
+ // accept a ! or ? at the end of the slice as a method name
22839
+ if (*source == '!' || *source == '?' || *source == '=') {
22840
+ source++;
22841
+ result = PM_SLICE_TYPE_METHOD_NAME;
22842
+ }
22843
+
22844
+ // valid if we are at the end of the slice
22845
+ return source == end ? result : PM_SLICE_TYPE_NONE;
22846
+ }
22847
+
22848
+ /**
22849
+ * Check that the slice is a valid local variable name.
22850
+ */
22851
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
22852
+ pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22853
+ switch (pm_slice_type(source, length, encoding_name)) {
22854
+ case PM_SLICE_TYPE_ERROR:
22855
+ return PM_STRING_QUERY_ERROR;
22856
+ case PM_SLICE_TYPE_NONE:
22857
+ case PM_SLICE_TYPE_CONSTANT:
22858
+ case PM_SLICE_TYPE_METHOD_NAME:
22859
+ return PM_STRING_QUERY_FALSE;
22860
+ case PM_SLICE_TYPE_LOCAL:
22861
+ return PM_STRING_QUERY_TRUE;
22862
+ }
22863
+
22864
+ assert(false && "unreachable");
22865
+ return PM_STRING_QUERY_FALSE;
22866
+ }
22867
+
22868
+ /**
22869
+ * Check that the slice is a valid constant name.
22870
+ */
22871
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
22872
+ pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22873
+ switch (pm_slice_type(source, length, encoding_name)) {
22874
+ case PM_SLICE_TYPE_ERROR:
22875
+ return PM_STRING_QUERY_ERROR;
22876
+ case PM_SLICE_TYPE_NONE:
22877
+ case PM_SLICE_TYPE_LOCAL:
22878
+ case PM_SLICE_TYPE_METHOD_NAME:
22879
+ return PM_STRING_QUERY_FALSE;
22880
+ case PM_SLICE_TYPE_CONSTANT:
22881
+ return PM_STRING_QUERY_TRUE;
22882
+ }
22883
+
22884
+ assert(false && "unreachable");
22885
+ return PM_STRING_QUERY_FALSE;
22886
+ }
22887
+
22888
+ /**
22889
+ * Check that the slice is a valid method name.
22890
+ */
22891
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
22892
+ pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22893
+ #define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22894
+ #define C1(c) (*source == c)
22895
+ #define C2(s) (memcmp(source, s, 2) == 0)
22896
+ #define C3(s) (memcmp(source, s, 3) == 0)
22897
+
22898
+ switch (pm_slice_type(source, length, encoding_name)) {
22899
+ case PM_SLICE_TYPE_ERROR:
22900
+ return PM_STRING_QUERY_ERROR;
22901
+ case PM_SLICE_TYPE_NONE:
22902
+ break;
22903
+ case PM_SLICE_TYPE_LOCAL:
22904
+ // numbered parameters are not valid method names
22905
+ return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22906
+ case PM_SLICE_TYPE_CONSTANT:
22907
+ // all constants are valid method names
22908
+ case PM_SLICE_TYPE_METHOD_NAME:
22909
+ // all method names are valid method names
22910
+ return PM_STRING_QUERY_TRUE;
22911
+ }
22912
+
22913
+ switch (length) {
22914
+ case 1:
22915
+ return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22916
+ case 2:
22917
+ return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22918
+ case 3:
22919
+ return B(C3("===") || C3("<=>") || C3("[]="));
22920
+ default:
22921
+ return PM_STRING_QUERY_FALSE;
22922
+ }
22923
+
22924
+ #undef B
22925
+ #undef C1
22926
+ #undef C2
22927
+ #undef C3
22928
+ }