prism 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +39 -1
  3. data/Makefile +1 -1
  4. data/config.yml +422 -3
  5. data/docs/build_system.md +8 -11
  6. data/docs/relocation.md +34 -0
  7. data/ext/prism/api_node.c +18 -10
  8. data/ext/prism/extconf.rb +13 -36
  9. data/ext/prism/extension.c +68 -0
  10. data/ext/prism/extension.h +1 -1
  11. data/include/prism/ast.h +427 -3
  12. data/include/prism/defines.h +22 -7
  13. data/include/prism/diagnostic.h +1 -0
  14. data/include/prism/parser.h +25 -12
  15. data/include/prism/version.h +2 -2
  16. data/include/prism.h +47 -0
  17. data/lib/prism/dot_visitor.rb +10 -0
  18. data/lib/prism/dsl.rb +4 -4
  19. data/lib/prism/ffi.rb +49 -2
  20. data/lib/prism/inspect_visitor.rb +2 -0
  21. data/lib/prism/node.rb +1839 -96
  22. data/lib/prism/parse_result/errors.rb +1 -1
  23. data/lib/prism/parse_result.rb +140 -3
  24. data/lib/prism/reflection.rb +2 -2
  25. data/lib/prism/relocation.rb +504 -0
  26. data/lib/prism/serialize.rb +17 -5
  27. data/lib/prism/string_query.rb +30 -0
  28. data/lib/prism/translation/parser/compiler.rb +36 -26
  29. data/lib/prism/translation/parser.rb +3 -3
  30. data/lib/prism/translation/ripper.rb +1 -5
  31. data/lib/prism/translation/ruby_parser.rb +14 -5
  32. data/lib/prism.rb +6 -4
  33. data/prism.gemspec +7 -1
  34. data/rbi/prism/dsl.rbi +4 -4
  35. data/rbi/prism/node.rbi +5118 -1030
  36. data/rbi/prism/parse_result.rbi +29 -0
  37. data/rbi/prism/string_query.rbi +12 -0
  38. data/rbi/prism.rbi +34 -34
  39. data/sig/prism/dsl.rbs +2 -2
  40. data/sig/prism/node.rbs +13 -98
  41. data/sig/prism/parse_result.rbs +20 -0
  42. data/sig/prism/relocation.rbs +185 -0
  43. data/sig/prism/string_query.rbs +11 -0
  44. data/src/diagnostic.c +3 -1
  45. data/src/node.c +18 -0
  46. data/src/prettyprint.c +32 -0
  47. data/src/prism.c +586 -195
  48. data/src/regexp.c +7 -3
  49. data/src/serialize.c +12 -0
  50. data/src/static_literals.c +1 -1
  51. data/src/util/pm_char.c +1 -1
  52. data/src/util/pm_string.c +1 -0
  53. metadata +9 -3
data/src/prism.c CHANGED
@@ -544,10 +544,7 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
544
544
  * token.
545
545
  */
546
546
  static void
547
- pm_parser_err_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
548
- const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
549
- size_t ident_length = lex_mode->as.heredoc.ident_length;
550
-
547
+ pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
551
548
  PM_PARSER_ERR_FORMAT(
552
549
  parser,
553
550
  ident_start,
@@ -964,7 +961,7 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals,
964
961
  if (local->name != PM_CONSTANT_ID_UNSET) {
965
962
  pm_constant_id_list_insert(list, (size_t) local->index, local->name);
966
963
 
967
- if (warn_unused && local->reads == 0) {
964
+ if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
968
965
  pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
969
966
 
970
967
  if (constant->length >= 1 && *constant->start != '_') {
@@ -2110,14 +2107,6 @@ pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2110
2107
  return node;
2111
2108
  }
2112
2109
 
2113
- /**
2114
- * Return the size of the given array node.
2115
- */
2116
- static inline size_t
2117
- pm_array_node_size(pm_array_node_t *node) {
2118
- return node->elements.size;
2119
- }
2120
-
2121
2110
  /**
2122
2111
  * Append an argument to an array node.
2123
2112
  */
@@ -4153,7 +4142,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4153
4142
 
4154
4143
  // If errno is set, then it should only be ERANGE. At this point we need to
4155
4144
  // check if it's infinity (it should be).
4156
- if (errno == ERANGE && isinf(value)) {
4145
+ if (errno == ERANGE && PRISM_ISINF(value)) {
4157
4146
  int warn_width;
4158
4147
  const char *ellipsis;
4159
4148
 
@@ -7695,7 +7684,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
7695
7684
  * Allocate a new UntilNode node.
7696
7685
  */
7697
7686
  static pm_until_node_t *
7698
- pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7687
+ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7699
7688
  pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7700
7689
  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7701
7690
 
@@ -7710,6 +7699,7 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
7710
7699
  },
7711
7700
  },
7712
7701
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7702
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7713
7703
  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7714
7704
  .predicate = predicate,
7715
7705
  .statements = statements
@@ -7738,6 +7728,7 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
7738
7728
  },
7739
7729
  },
7740
7730
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7731
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7741
7732
  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7742
7733
  .predicate = predicate,
7743
7734
  .statements = statements
@@ -7805,7 +7796,7 @@ pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statemen
7805
7796
  * Allocate a new WhileNode node.
7806
7797
  */
7807
7798
  static pm_while_node_t *
7808
- pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7799
+ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7809
7800
  pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7810
7801
  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7811
7802
 
@@ -7820,6 +7811,7 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
7820
7811
  },
7821
7812
  },
7822
7813
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7814
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7823
7815
  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7824
7816
  .predicate = predicate,
7825
7817
  .statements = statements
@@ -7848,6 +7840,7 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
7848
7840
  },
7849
7841
  },
7850
7842
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7843
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7851
7844
  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7852
7845
  .predicate = predicate,
7853
7846
  .statements = statements
@@ -7870,6 +7863,7 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
7870
7863
  .location = PM_LOCATION_NULL_VALUE(parser)
7871
7864
  },
7872
7865
  .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7866
+ .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7873
7867
  .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7874
7868
  .predicate = predicate,
7875
7869
  .statements = statements
@@ -8573,6 +8567,7 @@ context_terminator(pm_context_t context, pm_token_t *token) {
8573
8567
  case PM_CONTEXT_MAIN:
8574
8568
  case PM_CONTEXT_DEF_PARAMS:
8575
8569
  case PM_CONTEXT_DEFINED:
8570
+ case PM_CONTEXT_MULTI_TARGET:
8576
8571
  case PM_CONTEXT_TERNARY:
8577
8572
  case PM_CONTEXT_RESCUE_MODIFIER:
8578
8573
  return token->type == PM_TOKEN_EOF;
@@ -8777,6 +8772,7 @@ context_human(pm_context_t context) {
8777
8772
  case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8778
8773
  case PM_CONTEXT_MAIN: return "top level context";
8779
8774
  case PM_CONTEXT_MODULE: return "module definition";
8775
+ case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8780
8776
  case PM_CONTEXT_PARENS: return "parentheses";
8781
8777
  case PM_CONTEXT_POSTEXE: return "'END' block";
8782
8778
  case PM_CONTEXT_PREDICATE: return "predicate";
@@ -9051,6 +9047,10 @@ lex_global_variable(pm_parser_t *parser) {
9051
9047
  return PM_TOKEN_GLOBAL_VARIABLE;
9052
9048
  }
9053
9049
 
9050
+ // True if multiple characters are allowed after the declaration of the
9051
+ // global variable. Not true when it starts with "$-".
9052
+ bool allow_multiple = true;
9053
+
9054
9054
  switch (*parser->current.end) {
9055
9055
  case '~': // $~: match-data
9056
9056
  case '*': // $*: argv
@@ -9109,14 +9109,15 @@ lex_global_variable(pm_parser_t *parser) {
9109
9109
 
9110
9110
  case '-':
9111
9111
  parser->current.end++;
9112
- /* fallthrough */
9112
+ allow_multiple = false;
9113
+ PRISM_FALLTHROUGH
9113
9114
  default: {
9114
9115
  size_t width;
9115
9116
 
9116
9117
  if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
9117
9118
  do {
9118
9119
  parser->current.end += width;
9119
- } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9120
+ } while (allow_multiple && parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9120
9121
  } else if (pm_char_is_whitespace(peek(parser))) {
9121
9122
  // If we get here, then we have a $ followed by whitespace,
9122
9123
  // which is not allowed.
@@ -9881,6 +9882,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9881
9882
  }
9882
9883
  case 'c': {
9883
9884
  parser->current.end++;
9885
+ if (flags & PM_ESCAPE_FLAG_CONTROL) {
9886
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9887
+ }
9888
+
9884
9889
  if (parser->current.end == parser->end) {
9885
9890
  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9886
9891
  return;
@@ -9894,10 +9899,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9894
9899
  return;
9895
9900
  }
9896
9901
  case '\\':
9897
- if (flags & PM_ESCAPE_FLAG_CONTROL) {
9898
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9899
- return;
9900
- }
9901
9902
  parser->current.end++;
9902
9903
 
9903
9904
  if (match(parser, 'u') || match(parser, 'U')) {
@@ -9931,6 +9932,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9931
9932
  }
9932
9933
  case 'C': {
9933
9934
  parser->current.end++;
9935
+ if (flags & PM_ESCAPE_FLAG_CONTROL) {
9936
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9937
+ }
9938
+
9934
9939
  if (peek(parser) != '-') {
9935
9940
  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9936
9941
  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
@@ -9951,10 +9956,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9951
9956
  return;
9952
9957
  }
9953
9958
  case '\\':
9954
- if (flags & PM_ESCAPE_FLAG_CONTROL) {
9955
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9956
- return;
9957
- }
9958
9959
  parser->current.end++;
9959
9960
 
9960
9961
  if (match(parser, 'u') || match(parser, 'U')) {
@@ -9989,6 +9990,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9989
9990
  }
9990
9991
  case 'M': {
9991
9992
  parser->current.end++;
9993
+ if (flags & PM_ESCAPE_FLAG_META) {
9994
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9995
+ }
9996
+
9992
9997
  if (peek(parser) != '-') {
9993
9998
  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9994
9999
  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
@@ -10004,10 +10009,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
10004
10009
  uint8_t peeked = peek(parser);
10005
10010
  switch (peeked) {
10006
10011
  case '\\':
10007
- if (flags & PM_ESCAPE_FLAG_META) {
10008
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
10009
- return;
10010
- }
10011
10012
  parser->current.end++;
10012
10013
 
10013
10014
  if (match(parser, 'u') || match(parser, 'U')) {
@@ -10045,11 +10046,13 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
10045
10046
  escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10046
10047
  return;
10047
10048
  }
10049
+ PRISM_FALLTHROUGH
10048
10050
  }
10049
- /* fallthrough */
10050
10051
  default: {
10051
10052
  if (parser->current.end < parser->end) {
10052
10053
  escape_write_escape_encoded(parser, buffer);
10054
+ } else {
10055
+ pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10053
10056
  }
10054
10057
  return;
10055
10058
  }
@@ -10498,6 +10501,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10498
10501
  }
10499
10502
 
10500
10503
  const uint8_t *end = parser->current.end - 1;
10504
+ assert(end >= start);
10501
10505
  pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10502
10506
 
10503
10507
  token_buffer->cursor = end;
@@ -10578,9 +10582,15 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
10578
10582
  pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10579
10583
  }
10580
10584
 
10581
- const uint8_t delimiter = *parser->current.end;
10582
- parser->current.end += eol_length;
10585
+ uint8_t delimiter = *parser->current.end;
10583
10586
 
10587
+ // If our delimiter is \r\n, we want to treat it as if it's \n.
10588
+ // For example, %\r\nfoo\r\n should be "foo"
10589
+ if (eol_length == 2) {
10590
+ delimiter = *(parser->current.end + 1);
10591
+ }
10592
+
10593
+ parser->current.end += eol_length;
10584
10594
  return delimiter;
10585
10595
  }
10586
10596
 
@@ -10690,6 +10700,14 @@ parser_lex(pm_parser_t *parser) {
10690
10700
  // We'll check if we're at the end of the file. If we are, then we
10691
10701
  // need to return the EOF token.
10692
10702
  if (parser->current.end >= parser->end) {
10703
+ // If we hit EOF, but the EOF came immediately after a newline,
10704
+ // set the start of the token to the newline. This way any EOF
10705
+ // errors will be reported as happening on that line rather than
10706
+ // a line after. For example "foo(\n" should report an error
10707
+ // on line 1 even though EOF technically occurs on line 2.
10708
+ if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10709
+ parser->current.start -= 1;
10710
+ }
10693
10711
  LEX(PM_TOKEN_EOF);
10694
10712
  }
10695
10713
 
@@ -10732,7 +10750,7 @@ parser_lex(pm_parser_t *parser) {
10732
10750
 
10733
10751
  lexed_comment = true;
10734
10752
  }
10735
- /* fallthrough */
10753
+ PRISM_FALLTHROUGH
10736
10754
  case '\r':
10737
10755
  case '\n': {
10738
10756
  parser->semantic_token_seen = semantic_token_seen & 0x1;
@@ -10774,7 +10792,7 @@ parser_lex(pm_parser_t *parser) {
10774
10792
  parser->current.type = PM_TOKEN_NEWLINE;
10775
10793
  return;
10776
10794
  }
10777
- /* fallthrough */
10795
+ PRISM_FALLTHROUGH
10778
10796
  case PM_IGNORED_NEWLINE_ALL:
10779
10797
  if (!lexed_comment) parser_lex_ignored_newline(parser);
10780
10798
  lexed_comment = false;
@@ -10871,6 +10889,10 @@ parser_lex(pm_parser_t *parser) {
10871
10889
 
10872
10890
  // ,
10873
10891
  case ',':
10892
+ if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10893
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10894
+ }
10895
+
10874
10896
  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10875
10897
  LEX(PM_TOKEN_COMMA);
10876
10898
 
@@ -11153,12 +11175,14 @@ parser_lex(pm_parser_t *parser) {
11153
11175
  lex_mode_push(parser, (pm_lex_mode_t) {
11154
11176
  .mode = PM_LEX_HEREDOC,
11155
11177
  .as.heredoc = {
11156
- .ident_start = ident_start,
11157
- .ident_length = ident_length,
11178
+ .base = {
11179
+ .ident_start = ident_start,
11180
+ .ident_length = ident_length,
11181
+ .quote = quote,
11182
+ .indent = indent
11183
+ },
11158
11184
  .next_start = parser->current.end,
11159
- .quote = quote,
11160
- .indent = indent,
11161
- .common_whitespace = (size_t) -1,
11185
+ .common_whitespace = NULL,
11162
11186
  .line_continuation = false
11163
11187
  }
11164
11188
  });
@@ -11171,7 +11195,7 @@ parser_lex(pm_parser_t *parser) {
11171
11195
  // this is not a valid heredoc declaration. In this case we
11172
11196
  // will add an error, but we will still return a heredoc
11173
11197
  // start.
11174
- if (!ident_error) pm_parser_err_heredoc_term(parser, parser->lex_modes.current);
11198
+ if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
11175
11199
  body_start = parser->end;
11176
11200
  } else {
11177
11201
  // Otherwise, we want to indicate that the body of the
@@ -11783,7 +11807,7 @@ parser_lex(pm_parser_t *parser) {
11783
11807
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11784
11808
  break;
11785
11809
  }
11786
- /* fallthrough */
11810
+ PRISM_FALLTHROUGH
11787
11811
  default:
11788
11812
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11789
11813
  break;
@@ -11980,7 +12004,7 @@ parser_lex(pm_parser_t *parser) {
11980
12004
  pm_token_buffer_push_byte(&token_buffer, '\r');
11981
12005
  break;
11982
12006
  }
11983
- /* fallthrough */
12007
+ PRISM_FALLTHROUGH
11984
12008
  case '\n':
11985
12009
  pm_token_buffer_push_byte(&token_buffer, '\n');
11986
12010
 
@@ -12084,9 +12108,28 @@ parser_lex(pm_parser_t *parser) {
12084
12108
  pm_regexp_token_buffer_t token_buffer = { 0 };
12085
12109
 
12086
12110
  while (breakpoint != NULL) {
12111
+ uint8_t term = lex_mode->as.regexp.terminator;
12112
+ bool is_terminator = (*breakpoint == term);
12113
+
12114
+ // If the terminator is newline, we need to consider \r\n _also_ a newline
12115
+ // For example: `%\nfoo\r\n`
12116
+ // The string should be "foo", not "foo\r"
12117
+ if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12118
+ if (term == '\n') {
12119
+ is_terminator = true;
12120
+ }
12121
+
12122
+ // If the terminator is a CR, but we see a CRLF, we need to
12123
+ // treat the CRLF as a newline, meaning this is _not_ the
12124
+ // terminator
12125
+ if (term == '\r') {
12126
+ is_terminator = false;
12127
+ }
12128
+ }
12129
+
12087
12130
  // If we hit the terminator, we need to determine what kind of
12088
12131
  // token to return.
12089
- if (*breakpoint == lex_mode->as.regexp.terminator) {
12132
+ if (is_terminator) {
12090
12133
  if (lex_mode->as.regexp.nesting > 0) {
12091
12134
  parser->current.end = breakpoint + 1;
12092
12135
  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
@@ -12148,7 +12191,7 @@ parser_lex(pm_parser_t *parser) {
12148
12191
  pm_regexp_token_buffer_escape(parser, &token_buffer);
12149
12192
  token_buffer.base.cursor = breakpoint;
12150
12193
 
12151
- /* fallthrough */
12194
+ PRISM_FALLTHROUGH
12152
12195
  case '\n':
12153
12196
  // If we've hit a newline, then we need to track that in
12154
12197
  // the list of newlines.
@@ -12190,7 +12233,7 @@ parser_lex(pm_parser_t *parser) {
12190
12233
  pm_token_buffer_push_byte(&token_buffer.base, '\r');
12191
12234
  break;
12192
12235
  }
12193
- /* fallthrough */
12236
+ PRISM_FALLTHROUGH
12194
12237
  case '\n':
12195
12238
  if (parser->heredoc_end) {
12196
12239
  // ... if we are on the same line as a heredoc,
@@ -12316,10 +12359,29 @@ parser_lex(pm_parser_t *parser) {
12316
12359
  continue;
12317
12360
  }
12318
12361
 
12362
+ uint8_t term = lex_mode->as.string.terminator;
12363
+ bool is_terminator = (*breakpoint == term);
12364
+
12365
+ // If the terminator is newline, we need to consider \r\n _also_ a newline
12366
+ // For example: `%r\nfoo\r\n`
12367
+ // The string should be /foo/, not /foo\r/
12368
+ if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12369
+ if (term == '\n') {
12370
+ is_terminator = true;
12371
+ }
12372
+
12373
+ // If the terminator is a CR, but we see a CRLF, we need to
12374
+ // treat the CRLF as a newline, meaning this is _not_ the
12375
+ // terminator
12376
+ if (term == '\r') {
12377
+ is_terminator = false;
12378
+ }
12379
+ }
12380
+
12319
12381
  // Note that we have to check the terminator here first because we could
12320
12382
  // potentially be parsing a % string that has a # character as the
12321
12383
  // terminator.
12322
- if (*breakpoint == lex_mode->as.string.terminator) {
12384
+ if (is_terminator) {
12323
12385
  // If this terminator doesn't actually close the string, then we need
12324
12386
  // to continue on past it.
12325
12387
  if (lex_mode->as.string.nesting > 0) {
@@ -12379,7 +12441,7 @@ parser_lex(pm_parser_t *parser) {
12379
12441
  pm_token_buffer_escape(parser, &token_buffer);
12380
12442
  token_buffer.cursor = breakpoint;
12381
12443
 
12382
- /* fallthrough */
12444
+ PRISM_FALLTHROUGH
12383
12445
  case '\n':
12384
12446
  // When we hit a newline, we need to flush any potential
12385
12447
  // heredocs. Note that this has to happen after we check
@@ -12424,7 +12486,7 @@ parser_lex(pm_parser_t *parser) {
12424
12486
  pm_token_buffer_push_byte(&token_buffer, '\r');
12425
12487
  break;
12426
12488
  }
12427
- /* fallthrough */
12489
+ PRISM_FALLTHROUGH
12428
12490
  case '\n':
12429
12491
  if (!lex_mode->as.string.interpolation) {
12430
12492
  pm_token_buffer_push_byte(&token_buffer, '\\');
@@ -12514,6 +12576,7 @@ parser_lex(pm_parser_t *parser) {
12514
12576
  // Now let's grab the information about the identifier off of the
12515
12577
  // current lex mode.
12516
12578
  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12579
+ pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12517
12580
 
12518
12581
  bool line_continuation = lex_mode->as.heredoc.line_continuation;
12519
12582
  lex_mode->as.heredoc.line_continuation = false;
@@ -12523,15 +12586,16 @@ parser_lex(pm_parser_t *parser) {
12523
12586
  // terminator) but still continue parsing so that content after the
12524
12587
  // declaration of the heredoc can be parsed.
12525
12588
  if (parser->current.end >= parser->end) {
12526
- pm_parser_err_heredoc_term(parser, lex_mode);
12589
+ pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12527
12590
  parser->next_start = lex_mode->as.heredoc.next_start;
12528
12591
  parser->heredoc_end = parser->current.end;
12529
12592
  lex_state_set(parser, PM_LEX_STATE_END);
12593
+ lex_mode_pop(parser);
12530
12594
  LEX(PM_TOKEN_HEREDOC_END);
12531
12595
  }
12532
12596
 
12533
- const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
12534
- size_t ident_length = lex_mode->as.heredoc.ident_length;
12597
+ const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12598
+ size_t ident_length = heredoc_lex_mode->ident_length;
12535
12599
 
12536
12600
  // If we are immediately following a newline and we have hit the
12537
12601
  // terminator, then we need to return the ending of the heredoc.
@@ -12556,10 +12620,7 @@ parser_lex(pm_parser_t *parser) {
12556
12620
  const uint8_t *terminator_start = ident_end - ident_length;
12557
12621
  const uint8_t *cursor = start;
12558
12622
 
12559
- if (
12560
- lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_DASH ||
12561
- lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE
12562
- ) {
12623
+ if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12563
12624
  while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12564
12625
  cursor++;
12565
12626
  }
@@ -12582,17 +12643,19 @@ parser_lex(pm_parser_t *parser) {
12582
12643
  }
12583
12644
 
12584
12645
  lex_state_set(parser, PM_LEX_STATE_END);
12646
+ lex_mode_pop(parser);
12585
12647
  LEX(PM_TOKEN_HEREDOC_END);
12586
12648
  }
12587
12649
  }
12588
12650
 
12589
- size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
12651
+ size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12590
12652
  if (
12591
- lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE &&
12592
- (lex_mode->as.heredoc.common_whitespace > whitespace) &&
12653
+ heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12654
+ lex_mode->as.heredoc.common_whitespace != NULL &&
12655
+ (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12593
12656
  peek_at(parser, start) != '\n'
12594
12657
  ) {
12595
- lex_mode->as.heredoc.common_whitespace = whitespace;
12658
+ *lex_mode->as.heredoc.common_whitespace = whitespace;
12596
12659
  }
12597
12660
  }
12598
12661
 
@@ -12601,7 +12664,7 @@ parser_lex(pm_parser_t *parser) {
12601
12664
  // strpbrk to find the first of these characters.
12602
12665
  uint8_t breakpoints[] = "\r\n\\#";
12603
12666
 
12604
- pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
12667
+ pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12605
12668
  if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12606
12669
  breakpoints[3] = '\0';
12607
12670
  }
@@ -12631,7 +12694,7 @@ parser_lex(pm_parser_t *parser) {
12631
12694
  pm_token_buffer_escape(parser, &token_buffer);
12632
12695
  token_buffer.cursor = breakpoint;
12633
12696
 
12634
- /* fallthrough */
12697
+ PRISM_FALLTHROUGH
12635
12698
  case '\n': {
12636
12699
  if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12637
12700
  parser_flush_heredoc_end(parser);
@@ -12664,8 +12727,7 @@ parser_lex(pm_parser_t *parser) {
12664
12727
  // leading whitespace if we have a - or ~ heredoc.
12665
12728
  const uint8_t *cursor = start;
12666
12729
 
12667
- if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_DASH ||
12668
- lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
12730
+ if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12669
12731
  while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12670
12732
  cursor++;
12671
12733
  }
@@ -12681,16 +12743,16 @@ parser_lex(pm_parser_t *parser) {
12681
12743
  }
12682
12744
  }
12683
12745
 
12684
- size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
12746
+ size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12685
12747
 
12686
12748
  // If we have hit a newline that is followed by a valid
12687
12749
  // terminator, then we need to return the content of the
12688
12750
  // heredoc here as string content. Then, the next time a
12689
12751
  // token is lexed, it will match again and return the
12690
12752
  // end of the heredoc.
12691
- if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
12692
- if ((lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12693
- lex_mode->as.heredoc.common_whitespace = whitespace;
12753
+ if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12754
+ if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12755
+ *lex_mode->as.heredoc.common_whitespace = whitespace;
12694
12756
  }
12695
12757
 
12696
12758
  parser->current.end = breakpoint + 1;
@@ -12732,7 +12794,7 @@ parser_lex(pm_parser_t *parser) {
12732
12794
  pm_token_buffer_push_byte(&token_buffer, '\r');
12733
12795
  break;
12734
12796
  }
12735
- /* fallthrough */
12797
+ PRISM_FALLTHROUGH
12736
12798
  case '\n':
12737
12799
  pm_token_buffer_push_byte(&token_buffer, '\\');
12738
12800
  pm_token_buffer_push_byte(&token_buffer, '\n');
@@ -12752,12 +12814,12 @@ parser_lex(pm_parser_t *parser) {
12752
12814
  pm_token_buffer_push_byte(&token_buffer, '\r');
12753
12815
  break;
12754
12816
  }
12755
- /* fallthrough */
12817
+ PRISM_FALLTHROUGH
12756
12818
  case '\n':
12757
12819
  // If we are in a tilde here, we should
12758
12820
  // break out of the loop and return the
12759
12821
  // string content.
12760
- if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
12822
+ if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12761
12823
  const uint8_t *end = parser->current.end;
12762
12824
  pm_newline_list_append(&parser->newline_list, end);
12763
12825
 
@@ -12983,7 +13045,7 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12983
13045
  [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12984
13046
  [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12985
13047
  [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12986
- [PM_TOKEN_USTAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13048
+ [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12987
13049
 
12988
13050
  // -@
12989
13051
  [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
@@ -13044,14 +13106,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
13044
13106
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13045
13107
  }
13046
13108
 
13047
- /**
13048
- * Returns true if the current token is any of the six given types.
13049
- */
13050
- static inline bool
13051
- match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
13052
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
13053
- }
13054
-
13055
13109
  /**
13056
13110
  * Returns true if the current token is any of the seven given types.
13057
13111
  */
@@ -13068,6 +13122,14 @@ match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
13068
13122
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13069
13123
  }
13070
13124
 
13125
+ /**
13126
+ * Returns true if the current token is any of the nine given types.
13127
+ */
13128
+ static inline bool
13129
+ match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
13130
+ return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
13131
+ }
13132
+
13071
13133
  /**
13072
13134
  * If the current token is of the specified type, lex forward by one token and
13073
13135
  * return true. Otherwise, return false. For example:
@@ -13096,19 +13158,6 @@ accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13096
13158
  return false;
13097
13159
  }
13098
13160
 
13099
- /**
13100
- * If the current token is any of the three given types, lex forward by one
13101
- * token and return true. Otherwise return false.
13102
- */
13103
- static inline bool
13104
- accept3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13105
- if (match3(parser, type1, type2, type3)) {
13106
- parser_lex(parser);
13107
- return true;
13108
- }
13109
- return false;
13110
- }
13111
-
13112
13161
  /**
13113
13162
  * This function indicates that the parser expects a token in a specific
13114
13163
  * position. For example, if you're parsing a BEGIN block, you know that a { is
@@ -13146,32 +13195,16 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
13146
13195
  parser->previous.type = PM_TOKEN_MISSING;
13147
13196
  }
13148
13197
 
13149
- /**
13150
- * This function is the same as expect2, but it expects one of three token types.
13151
- */
13152
- static void
13153
- expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_diagnostic_id_t diag_id) {
13154
- if (accept3(parser, type1, type2, type3)) return;
13155
-
13156
- const uint8_t *location = parser->previous.end;
13157
- pm_parser_err(parser, location, location, diag_id);
13158
-
13159
- parser->previous.start = location;
13160
- parser->previous.type = PM_TOKEN_MISSING;
13161
- }
13162
-
13163
13198
  /**
13164
13199
  * A special expect1 that expects a heredoc terminator and handles popping the
13165
13200
  * lex mode accordingly.
13166
13201
  */
13167
13202
  static void
13168
- expect1_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
13203
+ expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
13169
13204
  if (match1(parser, PM_TOKEN_HEREDOC_END)) {
13170
- lex_mode_pop(parser);
13171
13205
  parser_lex(parser);
13172
13206
  } else {
13173
- pm_parser_err_heredoc_term(parser, lex_mode);
13174
- lex_mode_pop(parser);
13207
+ pm_parser_err_heredoc_term(parser, ident_start, ident_length);
13175
13208
  parser->previous.start = parser->previous.end;
13176
13209
  parser->previous.type = PM_TOKEN_MISSING;
13177
13210
  }
@@ -13503,7 +13536,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
13503
13536
  return (pm_node_t *) pm_index_target_node_create(parser, call);
13504
13537
  }
13505
13538
  }
13506
- /* fallthrough */
13539
+ PRISM_FALLTHROUGH
13507
13540
  default:
13508
13541
  // In this case we have a node that we don't know how to convert
13509
13542
  // into a target. We need to treat it as an error. For now, we'll
@@ -13585,7 +13618,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13585
13618
  case PM_BACK_REFERENCE_READ_NODE:
13586
13619
  case PM_NUMBERED_REFERENCE_READ_NODE:
13587
13620
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13588
- /* fallthrough */
13621
+ PRISM_FALLTHROUGH
13589
13622
  case PM_GLOBAL_VARIABLE_READ_NODE: {
13590
13623
  pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13591
13624
  pm_node_destroy(parser, target);
@@ -13712,6 +13745,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13712
13745
 
13713
13746
  // Replace the name with "[]=".
13714
13747
  call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13748
+
13749
+ // Ensure that the arguments for []= don't contain keywords
13750
+ pm_index_arguments_check(parser, call->arguments, call->block);
13715
13751
  pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13716
13752
 
13717
13753
  return target;
@@ -13724,7 +13760,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13724
13760
  // is no way for us to attach it to the tree at this point.
13725
13761
  pm_node_destroy(parser, value);
13726
13762
  }
13727
- /* fallthrough */
13763
+ PRISM_FALLTHROUGH
13728
13764
  default:
13729
13765
  // In this case we have a node that we don't know how to convert into a
13730
13766
  // target. We need to treat it as an error. For now, we'll mark it as an
@@ -13797,6 +13833,13 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13797
13833
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
13798
13834
  pm_multi_target_node_targets_append(parser, result, splat);
13799
13835
  has_rest = true;
13836
+ } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13837
+ context_push(parser, PM_CONTEXT_MULTI_TARGET);
13838
+ pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13839
+ target = parse_target(parser, target, true, false);
13840
+
13841
+ pm_multi_target_node_targets_append(parser, result, target);
13842
+ context_pop(parser);
13800
13843
  } else if (token_begins_expression_p(parser->current.type)) {
13801
13844
  pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13802
13845
  target = parse_target(parser, target, true, false);
@@ -14108,8 +14151,8 @@ static void
14108
14151
  parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
14109
14152
  pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14110
14153
 
14111
- // First we need to check if the next token is one that could be the start of
14112
- // an argument. If it's not, then we can just return.
14154
+ // First we need to check if the next token is one that could be the start
14155
+ // of an argument. If it's not, then we can just return.
14113
14156
  if (
14114
14157
  match2(parser, terminator, PM_TOKEN_EOF) ||
14115
14158
  (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
@@ -14186,6 +14229,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
14186
14229
  if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
14187
14230
  pm_parser_scope_forwarding_positionals_check(parser, &operator);
14188
14231
  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14232
+ if (parsed_bare_hash) {
14233
+ pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14234
+ }
14189
14235
  } else {
14190
14236
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14191
14237
 
@@ -14234,7 +14280,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
14234
14280
  }
14235
14281
  }
14236
14282
  }
14237
- /* fallthrough */
14283
+ PRISM_FALLTHROUGH
14238
14284
  default: {
14239
14285
  if (argument == NULL) {
14240
14286
  argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
@@ -14297,23 +14343,32 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
14297
14343
  // If parsing the argument failed, we need to stop parsing arguments.
14298
14344
  if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
14299
14345
 
14300
- // If the terminator of these arguments is not EOF, then we have a specific
14301
- // token we're looking for. In that case we can accept a newline here
14302
- // because it is not functioning as a statement terminator.
14303
- if (terminator != PM_TOKEN_EOF) accept1(parser, PM_TOKEN_NEWLINE);
14346
+ // If the terminator of these arguments is not EOF, then we have a
14347
+ // specific token we're looking for. In that case we can accept a
14348
+ // newline here because it is not functioning as a statement terminator.
14349
+ bool accepted_newline = false;
14350
+ if (terminator != PM_TOKEN_EOF) {
14351
+ accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14352
+ }
14304
14353
 
14305
14354
  if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
14306
- // If we previously were on a comma and we just parsed a bare hash, then
14307
- // we want to continue parsing arguments. This is because the comma was
14308
- // grabbed up by the hash parser.
14355
+ // If we previously were on a comma and we just parsed a bare hash,
14356
+ // then we want to continue parsing arguments. This is because the
14357
+ // comma was grabbed up by the hash parser.
14358
+ } else if (accept1(parser, PM_TOKEN_COMMA)) {
14359
+ // If there was a comma, then we need to check if we also accepted a
14360
+ // newline. If we did, then this is a syntax error.
14361
+ if (accepted_newline) {
14362
+ pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14363
+ }
14309
14364
  } else {
14310
- // If there is no comma at the end of the argument list then we're done
14311
- // parsing arguments and can break out of this loop.
14312
- if (!accept1(parser, PM_TOKEN_COMMA)) break;
14365
+ // If there is no comma at the end of the argument list then we're
14366
+ // done parsing arguments and can break out of this loop.
14367
+ break;
14313
14368
  }
14314
14369
 
14315
- // If we hit the terminator, then that means we have a trailing comma so we
14316
- // can accept that output as well.
14370
+ // If we hit the terminator, then that means we have a trailing comma so
14371
+ // we can accept that output as well.
14317
14372
  if (match1(parser, terminator)) break;
14318
14373
  }
14319
14374
  }
@@ -14468,15 +14523,17 @@ parse_parameters(
14468
14523
  bool allows_trailing_comma,
14469
14524
  bool allows_forwarding_parameters,
14470
14525
  bool accepts_blocks_in_defaults,
14526
+ bool in_block,
14471
14527
  uint16_t depth
14472
14528
  ) {
14473
- pm_parameters_node_t *params = pm_parameters_node_create(parser);
14474
- bool looping = true;
14475
-
14476
14529
  pm_do_loop_stack_push(parser, false);
14530
+
14531
+ pm_parameters_node_t *params = pm_parameters_node_create(parser);
14477
14532
  pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14478
14533
 
14479
- do {
14534
+ while (true) {
14535
+ bool parsing = true;
14536
+
14480
14537
  switch (parser->current.type) {
14481
14538
  case PM_TOKEN_PARENTHESIS_LEFT: {
14482
14539
  update_parameter_state(parser, &parser->current, &order);
@@ -14611,7 +14668,7 @@ parse_parameters(
14611
14668
  // then we can put a missing node in its place and stop parsing the
14612
14669
  // parameters entirely now.
14613
14670
  if (parser->recovering) {
14614
- looping = false;
14671
+ parsing = false;
14615
14672
  break;
14616
14673
  }
14617
14674
  } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
@@ -14631,7 +14688,7 @@ parse_parameters(
14631
14688
  break;
14632
14689
  }
14633
14690
  case PM_TOKEN_LABEL: {
14634
- if (!uses_parentheses) parser->in_keyword_arg = true;
14691
+ if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14635
14692
  update_parameter_state(parser, &parser->current, &order);
14636
14693
 
14637
14694
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
@@ -14669,7 +14726,7 @@ parse_parameters(
14669
14726
  context_pop(parser);
14670
14727
 
14671
14728
  if (uses_parentheses) {
14672
- looping = false;
14729
+ parsing = false;
14673
14730
  break;
14674
14731
  }
14675
14732
 
@@ -14713,7 +14770,7 @@ parse_parameters(
14713
14770
  // then we can put a missing node in its place and stop parsing the
14714
14771
  // parameters entirely now.
14715
14772
  if (parser->recovering) {
14716
- looping = false;
14773
+ parsing = false;
14717
14774
  break;
14718
14775
  }
14719
14776
  }
@@ -14815,14 +14872,31 @@ parse_parameters(
14815
14872
  }
14816
14873
  }
14817
14874
 
14818
- looping = false;
14875
+ parsing = false;
14819
14876
  break;
14820
14877
  }
14821
14878
 
14822
- if (looping && uses_parentheses) {
14823
- accept1(parser, PM_TOKEN_NEWLINE);
14879
+ // If we hit some kind of issue while parsing the parameter, this would
14880
+ // have been set to false. In that case, we need to break out of the
14881
+ // loop.
14882
+ if (!parsing) break;
14883
+
14884
+ bool accepted_newline = false;
14885
+ if (uses_parentheses) {
14886
+ accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14824
14887
  }
14825
- } while (looping && accept1(parser, PM_TOKEN_COMMA));
14888
+
14889
+ if (accept1(parser, PM_TOKEN_COMMA)) {
14890
+ // If there was a comma, but we also accepted a newline, then this
14891
+ // is a syntax error.
14892
+ if (accepted_newline) {
14893
+ pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14894
+ }
14895
+ } else {
14896
+ // If there was no comma, then we're done parsing parameters.
14897
+ break;
14898
+ }
14899
+ }
14826
14900
 
14827
14901
  pm_do_loop_stack_pop(parser);
14828
14902
 
@@ -15083,7 +15157,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
15083
15157
  case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15084
15158
  case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15085
15159
  case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15086
- default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15160
+ default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15087
15161
  }
15088
15162
 
15089
15163
  else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
@@ -15178,6 +15252,7 @@ parse_block_parameters(
15178
15252
  allows_trailing_comma,
15179
15253
  false,
15180
15254
  accepts_blocks_in_defaults,
15255
+ true,
15181
15256
  (uint16_t) (depth + 1)
15182
15257
  );
15183
15258
  }
@@ -15500,6 +15575,7 @@ parse_return(pm_parser_t *parser, pm_node_t *node) {
15500
15575
  case PM_CONTEXT_IF:
15501
15576
  case PM_CONTEXT_LOOP_PREDICATE:
15502
15577
  case PM_CONTEXT_MAIN:
15578
+ case PM_CONTEXT_MULTI_TARGET:
15503
15579
  case PM_CONTEXT_PARENS:
15504
15580
  case PM_CONTEXT_POSTEXE:
15505
15581
  case PM_CONTEXT_PREDICATE:
@@ -15628,6 +15704,7 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15628
15704
  case PM_CONTEXT_MODULE_ENSURE:
15629
15705
  case PM_CONTEXT_MODULE_RESCUE:
15630
15706
  case PM_CONTEXT_MODULE:
15707
+ case PM_CONTEXT_MULTI_TARGET:
15631
15708
  case PM_CONTEXT_PARENS:
15632
15709
  case PM_CONTEXT_PREDICATE:
15633
15710
  case PM_CONTEXT_RESCUE_MODIFIER:
@@ -16091,7 +16168,7 @@ parse_operator_symbol_name(const pm_token_t *name) {
16091
16168
  case PM_TOKEN_TILDE:
16092
16169
  case PM_TOKEN_BANG:
16093
16170
  if (name->end[-1] == '@') return name->end - 1;
16094
- /* fallthrough */
16171
+ PRISM_FALLTHROUGH
16095
16172
  default:
16096
16173
  return name->end;
16097
16174
  }
@@ -16347,14 +16424,15 @@ static pm_node_t *
16347
16424
  parse_variable(pm_parser_t *parser) {
16348
16425
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16349
16426
  int depth;
16427
+ bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
16350
16428
 
16351
- if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
16429
+ if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16352
16430
  return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16353
16431
  }
16354
16432
 
16355
16433
  pm_scope_t *current_scope = parser->current_scope;
16356
16434
  if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16357
- if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
16435
+ if (is_numbered_param) {
16358
16436
  // When you use a numbered parameter, it implies the existence of
16359
16437
  // all of the locals that exist before it. For example, referencing
16360
16438
  // _2 means that _1 must exist. Therefore here we loop through all
@@ -17045,7 +17123,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
17045
17123
  parse_pattern_hash_key(parser, &keys, first_node);
17046
17124
  pm_node_t *value;
17047
17125
 
17048
- if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17126
+ if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
17049
17127
  // Otherwise, we will create an implicit local variable
17050
17128
  // target for the value.
17051
17129
  value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
@@ -17062,7 +17140,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
17062
17140
  break;
17063
17141
  }
17064
17142
  }
17065
- /* fallthrough */
17143
+ PRISM_FALLTHROUGH
17066
17144
  default: {
17067
17145
  // If we get anything else, then this is an error. For this we'll
17068
17146
  // create a missing node for the value and create an assoc node for
@@ -17082,7 +17160,12 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
17082
17160
  // If there are any other assocs, then we'll parse them now.
17083
17161
  while (accept1(parser, PM_TOKEN_COMMA)) {
17084
17162
  // Here we need to break to support trailing commas.
17085
- if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17163
+ if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
17164
+ // Trailing commas are not allowed to follow a rest pattern.
17165
+ if (rest != NULL) {
17166
+ pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17167
+ }
17168
+
17086
17169
  break;
17087
17170
  }
17088
17171
 
@@ -17553,7 +17636,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
17553
17636
  break;
17554
17637
  }
17555
17638
  }
17556
- /* fallthrough */
17639
+ PRISM_FALLTHROUGH
17557
17640
  default:
17558
17641
  node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17559
17642
  break;
@@ -17575,9 +17658,10 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
17575
17658
  // Gather up all of the patterns into the list.
17576
17659
  while (accept1(parser, PM_TOKEN_COMMA)) {
17577
17660
  // Break early here in case we have a trailing comma.
17578
- if (match4(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_SEMICOLON)) {
17661
+ if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17579
17662
  node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17580
17663
  pm_node_list_append(&nodes, node);
17664
+ trailing_rest = true;
17581
17665
  break;
17582
17666
  }
17583
17667
 
@@ -17779,6 +17863,7 @@ parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17779
17863
  case PM_CONTEXT_LAMBDA_BRACES:
17780
17864
  case PM_CONTEXT_LAMBDA_DO_END:
17781
17865
  case PM_CONTEXT_LOOP_PREDICATE:
17866
+ case PM_CONTEXT_MULTI_TARGET:
17782
17867
  case PM_CONTEXT_PARENS:
17783
17868
  case PM_CONTEXT_POSTEXE:
17784
17869
  case PM_CONTEXT_PREDICATE:
@@ -17862,6 +17947,7 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17862
17947
  case PM_CONTEXT_LAMBDA_ENSURE:
17863
17948
  case PM_CONTEXT_LAMBDA_RESCUE:
17864
17949
  case PM_CONTEXT_LOOP_PREDICATE:
17950
+ case PM_CONTEXT_MULTI_TARGET:
17865
17951
  case PM_CONTEXT_PARENS:
17866
17952
  case PM_CONTEXT_POSTEXE:
17867
17953
  case PM_CONTEXT_PREDICATE:
@@ -17951,19 +18037,31 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17951
18037
  bool parsed_bare_hash = false;
17952
18038
 
17953
18039
  while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
18040
+ bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
18041
+
17954
18042
  // Handle the case where we don't have a comma and we have a
17955
18043
  // newline followed by a right bracket.
17956
- if (accept1(parser, PM_TOKEN_NEWLINE) && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18044
+ if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17957
18045
  break;
17958
18046
  }
17959
18047
 
17960
18048
  // Ensure that we have a comma between elements in the array.
17961
- if ((pm_array_node_size(array) != 0) && !accept1(parser, PM_TOKEN_COMMA)) {
17962
- const uint8_t *location = parser->previous.end;
17963
- PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
18049
+ if (array->elements.size > 0) {
18050
+ if (accept1(parser, PM_TOKEN_COMMA)) {
18051
+ // If there was a comma but we also accepts a newline,
18052
+ // then this is a syntax error.
18053
+ if (accepted_newline) {
18054
+ pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
18055
+ }
18056
+ } else {
18057
+ // If there was no comma, then we need to add a syntax
18058
+ // error.
18059
+ const uint8_t *location = parser->previous.end;
18060
+ PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
17964
18061
 
17965
- parser->previous.start = location;
17966
- parser->previous.type = PM_TOKEN_MISSING;
18062
+ parser->previous.start = location;
18063
+ parser->previous.type = PM_TOKEN_MISSING;
18064
+ }
17967
18065
  }
17968
18066
 
17969
18067
  // If we have a right bracket immediately following a comma,
@@ -18119,14 +18217,32 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18119
18217
  multi_target->base.location.start = lparen_loc.start;
18120
18218
  multi_target->base.location.end = rparen_loc.end;
18121
18219
 
18122
- if (match1(parser, PM_TOKEN_COMMA)) {
18123
- if (binding_power == PM_BINDING_POWER_STATEMENT) {
18124
- return parse_targets_validate(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18125
- }
18126
- return (pm_node_t *) multi_target;
18220
+ pm_node_t *result;
18221
+ if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18222
+ result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18223
+ accept1(parser, PM_TOKEN_NEWLINE);
18224
+ } else {
18225
+ result = (pm_node_t *) multi_target;
18127
18226
  }
18128
18227
 
18129
- return parse_target_validate(parser, (pm_node_t *) multi_target, false);
18228
+ if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18229
+ // All set, this is explicitly allowed by the parent
18230
+ // context.
18231
+ } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18232
+ // All set, we're inside a for loop and we're parsing
18233
+ // multiple targets.
18234
+ } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18235
+ // Multi targets are not allowed when it's not a
18236
+ // statement level.
18237
+ pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18238
+ } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18239
+ // Multi targets must be followed by an equal sign in
18240
+ // order to be valid (or a right parenthesis if they are
18241
+ // nested).
18242
+ pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18243
+ }
18244
+
18245
+ return result;
18130
18246
  }
18131
18247
 
18132
18248
  // If we have a single statement and are ending on a right parenthesis
@@ -18187,6 +18303,33 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18187
18303
  pm_accepts_block_stack_pop(parser);
18188
18304
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18189
18305
 
18306
+ // When we're parsing multi targets, we allow them to be followed by
18307
+ // a right parenthesis if they are at the statement level. This is
18308
+ // only possible if they are the final statement in a parentheses.
18309
+ // We need to explicitly reject that here.
18310
+ {
18311
+ pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
18312
+
18313
+ if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18314
+ pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
18315
+ pm_multi_target_node_targets_append(parser, multi_target, statement);
18316
+
18317
+ statement = (pm_node_t *) multi_target;
18318
+ statements->body.nodes[statements->body.size - 1] = statement;
18319
+ }
18320
+
18321
+ if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
18322
+ const uint8_t *offset = statement->location.end;
18323
+ pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
18324
+ pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
18325
+
18326
+ statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
18327
+ statements->body.nodes[statements->body.size - 1] = statement;
18328
+
18329
+ pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
18330
+ }
18331
+ }
18332
+
18190
18333
  pop_block_exits(parser, previous_block_exits);
18191
18334
  pm_node_list_free(&current_block_exits);
18192
18335
 
@@ -18442,10 +18585,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18442
18585
  case PM_TOKEN_HEREDOC_START: {
18443
18586
  // Here we have found a heredoc. We'll parse it and add it to the
18444
18587
  // list of strings.
18445
- pm_lex_mode_t *lex_mode = parser->lex_modes.current;
18446
- assert(lex_mode->mode == PM_LEX_HEREDOC);
18447
- pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
18448
- pm_heredoc_indent_t indent = lex_mode->as.heredoc.indent;
18588
+ assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18589
+ pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18590
+
18591
+ size_t common_whitespace = (size_t) -1;
18592
+ parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18449
18593
 
18450
18594
  parser_lex(parser);
18451
18595
  pm_token_t opening = parser->previous;
@@ -18456,10 +18600,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18456
18600
  if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18457
18601
  // If we get here, then we have an empty heredoc. We'll create
18458
18602
  // an empty content token and return an empty string node.
18459
- expect1_heredoc_term(parser, lex_mode);
18603
+ expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18460
18604
  pm_token_t content = parse_strings_empty_content(parser->previous.start);
18461
18605
 
18462
- if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
18606
+ if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18463
18607
  node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18464
18608
  } else {
18465
18609
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
@@ -18486,18 +18630,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18486
18630
  cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18487
18631
  cast->base.location = cast->opening_loc;
18488
18632
 
18489
- if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
18633
+ if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18490
18634
  assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18491
18635
  cast->base.type = PM_X_STRING_NODE;
18492
18636
  }
18493
18637
 
18494
- size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
18495
- if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18638
+ if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18496
18639
  parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18497
18640
  }
18498
18641
 
18499
18642
  node = (pm_node_t *) cast;
18500
- expect1_heredoc_term(parser, lex_mode);
18643
+ expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18501
18644
  } else {
18502
18645
  // If we get here, then we have multiple parts in the heredoc,
18503
18646
  // so we'll need to create an interpolated string node to hold
@@ -18511,15 +18654,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18511
18654
  }
18512
18655
  }
18513
18656
 
18514
- size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
18515
-
18516
18657
  // Now that we have all of the parts, create the correct type of
18517
18658
  // interpolated node.
18518
- if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
18659
+ if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18519
18660
  pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18520
18661
  cast->parts = parts;
18521
18662
 
18522
- expect1_heredoc_term(parser, lex_mode);
18663
+ expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18523
18664
  pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18524
18665
 
18525
18666
  cast->base.location = cast->opening_loc;
@@ -18528,7 +18669,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18528
18669
  pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18529
18670
  pm_node_list_free(&parts);
18530
18671
 
18531
- expect1_heredoc_term(parser, lex_mode);
18672
+ expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18532
18673
  pm_interpolated_string_node_closing_set(cast, &parser->previous);
18533
18674
 
18534
18675
  cast->base.location = cast->opening_loc;
@@ -18537,9 +18678,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18537
18678
 
18538
18679
  // If this is a heredoc that is indented with a ~, then we need
18539
18680
  // to dedent each line by the common leading whitespace.
18540
- if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18681
+ if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18541
18682
  pm_node_list_t *nodes;
18542
- if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
18683
+ if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18543
18684
  nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18544
18685
  } else {
18545
18686
  nodes = &((pm_interpolated_string_node_t *) node)->parts;
@@ -18625,7 +18766,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18625
18766
  pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18626
18767
  }
18627
18768
  }
18628
- /* fallthrough */
18769
+ PRISM_FALLTHROUGH
18629
18770
  default:
18630
18771
  return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18631
18772
  }
@@ -19116,6 +19257,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19116
19257
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
19117
19258
  parser_lex(parser);
19118
19259
 
19260
+ // This will be false if the method name is not a valid identifier
19261
+ // but could be followed by an operator.
19262
+ bool valid_name = true;
19263
+
19119
19264
  switch (parser->current.type) {
19120
19265
  case PM_CASE_OPERATOR:
19121
19266
  pm_parser_scope_push(parser, true);
@@ -19145,10 +19290,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19145
19290
 
19146
19291
  break;
19147
19292
  }
19148
- case PM_TOKEN_CONSTANT:
19149
19293
  case PM_TOKEN_INSTANCE_VARIABLE:
19150
19294
  case PM_TOKEN_CLASS_VARIABLE:
19151
19295
  case PM_TOKEN_GLOBAL_VARIABLE:
19296
+ valid_name = false;
19297
+ PRISM_FALLTHROUGH
19298
+ case PM_TOKEN_CONSTANT:
19152
19299
  case PM_TOKEN_KEYWORD_NIL:
19153
19300
  case PM_TOKEN_KEYWORD_SELF:
19154
19301
  case PM_TOKEN_KEYWORD_TRUE:
@@ -19206,6 +19353,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19206
19353
 
19207
19354
  name = parse_method_definition_name(parser);
19208
19355
  } else {
19356
+ if (!valid_name) {
19357
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19358
+ }
19359
+
19209
19360
  name = identifier;
19210
19361
  }
19211
19362
  break;
@@ -19256,7 +19407,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19256
19407
  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19257
19408
  params = NULL;
19258
19409
  } else {
19259
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, (uint16_t) (depth + 1));
19410
+ params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
19260
19411
  }
19261
19412
 
19262
19413
  lex_state_set(parser, PM_LEX_STATE_BEG);
@@ -19281,7 +19432,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19281
19432
 
19282
19433
  lparen = not_provided(parser);
19283
19434
  rparen = not_provided(parser);
19284
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, (uint16_t) (depth + 1));
19435
+ params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
19285
19436
 
19286
19437
  context_pop(parser);
19287
19438
  break;
@@ -19690,9 +19841,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19690
19841
  pm_do_loop_stack_pop(parser);
19691
19842
  context_pop(parser);
19692
19843
 
19693
- expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19694
- pm_statements_node_t *statements = NULL;
19844
+ pm_token_t do_keyword;
19845
+ if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19846
+ do_keyword = parser->previous;
19847
+ } else {
19848
+ do_keyword = not_provided(parser);
19849
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19850
+ }
19695
19851
 
19852
+ pm_statements_node_t *statements = NULL;
19696
19853
  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19697
19854
  pm_accepts_block_stack_push(parser, true);
19698
19855
  statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
@@ -19703,7 +19860,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19703
19860
  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19704
19861
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19705
19862
 
19706
- return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19863
+ return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19707
19864
  }
19708
19865
  case PM_TOKEN_KEYWORD_WHILE: {
19709
19866
  size_t opening_newline_index = token_newline_index(parser);
@@ -19718,9 +19875,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19718
19875
  pm_do_loop_stack_pop(parser);
19719
19876
  context_pop(parser);
19720
19877
 
19721
- expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19722
- pm_statements_node_t *statements = NULL;
19878
+ pm_token_t do_keyword;
19879
+ if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19880
+ do_keyword = parser->previous;
19881
+ } else {
19882
+ do_keyword = not_provided(parser);
19883
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19884
+ }
19723
19885
 
19886
+ pm_statements_node_t *statements = NULL;
19724
19887
  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19725
19888
  pm_accepts_block_stack_push(parser, true);
19726
19889
  statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
@@ -19731,7 +19894,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19731
19894
  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19732
19895
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19733
19896
 
19734
- return (pm_node_t *) pm_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19897
+ return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19735
19898
  }
19736
19899
  case PM_TOKEN_PERCENT_LOWER_I: {
19737
19900
  parser_lex(parser);
@@ -20801,7 +20964,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20801
20964
  pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
20802
20965
  }
20803
20966
  }
20804
- /* fallthrough */
20967
+ PRISM_FALLTHROUGH
20805
20968
  case PM_CASE_WRITABLE: {
20806
20969
  parser_lex(parser);
20807
20970
  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
@@ -20847,7 +21010,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20847
21010
  case PM_BACK_REFERENCE_READ_NODE:
20848
21011
  case PM_NUMBERED_REFERENCE_READ_NODE:
20849
21012
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20850
- /* fallthrough */
21013
+ PRISM_FALLTHROUGH
20851
21014
  case PM_GLOBAL_VARIABLE_READ_NODE: {
20852
21015
  parser_lex(parser);
20853
21016
 
@@ -20965,7 +21128,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20965
21128
  case PM_BACK_REFERENCE_READ_NODE:
20966
21129
  case PM_NUMBERED_REFERENCE_READ_NODE:
20967
21130
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20968
- /* fallthrough */
21131
+ PRISM_FALLTHROUGH
20969
21132
  case PM_GLOBAL_VARIABLE_READ_NODE: {
20970
21133
  parser_lex(parser);
20971
21134
 
@@ -21093,7 +21256,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21093
21256
  case PM_BACK_REFERENCE_READ_NODE:
21094
21257
  case PM_NUMBERED_REFERENCE_READ_NODE:
21095
21258
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21096
- /* fallthrough */
21259
+ PRISM_FALLTHROUGH
21097
21260
  case PM_GLOBAL_VARIABLE_READ_NODE: {
21098
21261
  parser_lex(parser);
21099
21262
 
@@ -21303,6 +21466,33 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21303
21466
  case PM_TOKEN_STAR:
21304
21467
  case PM_TOKEN_STAR_STAR: {
21305
21468
  parser_lex(parser);
21469
+ pm_token_t operator = parser->previous;
21470
+ switch (PM_NODE_TYPE(node)) {
21471
+ case PM_RESCUE_MODIFIER_NODE: {
21472
+ pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
21473
+ if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21474
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21475
+ }
21476
+ break;
21477
+ }
21478
+ case PM_AND_NODE: {
21479
+ pm_and_node_t *cast = (pm_and_node_t *) node;
21480
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21481
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21482
+ }
21483
+ break;
21484
+ }
21485
+ case PM_OR_NODE: {
21486
+ pm_or_node_t *cast = (pm_or_node_t *) node;
21487
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21488
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21489
+ }
21490
+ break;
21491
+ }
21492
+ default:
21493
+ break;
21494
+ }
21495
+
21306
21496
  pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21307
21497
  return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21308
21498
  }
@@ -21330,6 +21520,32 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21330
21520
  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21331
21521
  }
21332
21522
 
21523
+ switch (PM_NODE_TYPE(node)) {
21524
+ case PM_RESCUE_MODIFIER_NODE: {
21525
+ pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
21526
+ if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21527
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21528
+ }
21529
+ break;
21530
+ }
21531
+ case PM_AND_NODE: {
21532
+ pm_and_node_t *cast = (pm_and_node_t *) node;
21533
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21534
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21535
+ }
21536
+ break;
21537
+ }
21538
+ case PM_OR_NODE: {
21539
+ pm_or_node_t *cast = (pm_or_node_t *) node;
21540
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21541
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21542
+ }
21543
+ break;
21544
+ }
21545
+ default:
21546
+ break;
21547
+ }
21548
+
21333
21549
  pm_token_t message;
21334
21550
 
21335
21551
  switch (parser->current.type) {
@@ -21677,6 +21893,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
21677
21893
  if (pm_symbol_node_label_p(node)) {
21678
21894
  return node;
21679
21895
  }
21896
+ break;
21680
21897
  default:
21681
21898
  break;
21682
21899
  }
@@ -21684,8 +21901,11 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
21684
21901
  // Otherwise we'll look and see if the next token can be parsed as an infix
21685
21902
  // operator. If it can, then we'll parse it using parse_expression_infix.
21686
21903
  pm_binding_powers_t current_binding_powers;
21904
+ pm_token_type_t current_token_type;
21905
+
21687
21906
  while (
21688
- current_binding_powers = pm_binding_powers[parser->current.type],
21907
+ current_token_type = parser->current.type,
21908
+ current_binding_powers = pm_binding_powers[current_token_type],
21689
21909
  binding_power <= current_binding_powers.left &&
21690
21910
  current_binding_powers.binary
21691
21911
  ) {
@@ -21726,6 +21946,13 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
21726
21946
  // If the operator is nonassoc and we should not be able to parse the
21727
21947
  // upcoming infix operator, break.
21728
21948
  if (current_binding_powers.nonassoc) {
21949
+ // If this is a non-assoc operator and we are about to parse the
21950
+ // exact same operator, then we need to add an error.
21951
+ if (match1(parser, current_token_type)) {
21952
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21953
+ break;
21954
+ }
21955
+
21729
21956
  // If this is an endless range, then we need to reject a couple of
21730
21957
  // additional operators because it violates the normal operator
21731
21958
  // precedence rules. Those patterns are:
@@ -21735,7 +21962,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
21735
21962
  //
21736
21963
  if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
21737
21964
  if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
21738
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(parser->previous.type));
21965
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21739
21966
  break;
21740
21967
  }
21741
21968
 
@@ -21857,6 +22084,7 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
21857
22084
  ));
21858
22085
 
21859
22086
  pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
22087
+ pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
21860
22088
  }
21861
22089
 
21862
22090
  pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
@@ -22535,3 +22763,166 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
22535
22763
  }
22536
22764
 
22537
22765
  #endif
22766
+
22767
+ /******************************************************************************/
22768
+ /* Slice queries for the Ruby API */
22769
+ /******************************************************************************/
22770
+
22771
+ /** The category of slice returned from pm_slice_type. */
22772
+ typedef enum {
22773
+ /** Returned when the given encoding name is invalid. */
22774
+ PM_SLICE_TYPE_ERROR = -1,
22775
+
22776
+ /** Returned when no other types apply to the slice. */
22777
+ PM_SLICE_TYPE_NONE,
22778
+
22779
+ /** Returned when the slice is a valid local variable name. */
22780
+ PM_SLICE_TYPE_LOCAL,
22781
+
22782
+ /** Returned when the slice is a valid constant name. */
22783
+ PM_SLICE_TYPE_CONSTANT,
22784
+
22785
+ /** Returned when the slice is a valid method name. */
22786
+ PM_SLICE_TYPE_METHOD_NAME
22787
+ } pm_slice_type_t;
22788
+
22789
+ /**
22790
+ * Check that the slice is a valid local variable name or constant.
22791
+ */
22792
+ pm_slice_type_t
22793
+ pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22794
+ // first, get the right encoding object
22795
+ const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22796
+ if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22797
+
22798
+ // check that there is at least one character
22799
+ if (length == 0) return PM_SLICE_TYPE_NONE;
22800
+
22801
+ size_t width;
22802
+ if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22803
+ // valid because alphabetical
22804
+ } else if (*source == '_') {
22805
+ // valid because underscore
22806
+ width = 1;
22807
+ } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22808
+ // valid because multibyte
22809
+ } else {
22810
+ // invalid because no match
22811
+ return PM_SLICE_TYPE_NONE;
22812
+ }
22813
+
22814
+ // determine the type of the slice based on the first character
22815
+ const uint8_t *end = source + length;
22816
+ pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22817
+
22818
+ // next, iterate through all of the bytes of the string to ensure that they
22819
+ // are all valid identifier characters
22820
+ source += width;
22821
+
22822
+ while (source < end) {
22823
+ if ((width = encoding->alnum_char(source, end - source)) != 0) {
22824
+ // valid because alphanumeric
22825
+ source += width;
22826
+ } else if (*source == '_') {
22827
+ // valid because underscore
22828
+ source++;
22829
+ } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22830
+ // valid because multibyte
22831
+ source += width;
22832
+ } else {
22833
+ // invalid because no match
22834
+ break;
22835
+ }
22836
+ }
22837
+
22838
+ // accept a ! or ? at the end of the slice as a method name
22839
+ if (*source == '!' || *source == '?' || *source == '=') {
22840
+ source++;
22841
+ result = PM_SLICE_TYPE_METHOD_NAME;
22842
+ }
22843
+
22844
+ // valid if we are at the end of the slice
22845
+ return source == end ? result : PM_SLICE_TYPE_NONE;
22846
+ }
22847
+
22848
+ /**
22849
+ * Check that the slice is a valid local variable name.
22850
+ */
22851
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
22852
+ pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22853
+ switch (pm_slice_type(source, length, encoding_name)) {
22854
+ case PM_SLICE_TYPE_ERROR:
22855
+ return PM_STRING_QUERY_ERROR;
22856
+ case PM_SLICE_TYPE_NONE:
22857
+ case PM_SLICE_TYPE_CONSTANT:
22858
+ case PM_SLICE_TYPE_METHOD_NAME:
22859
+ return PM_STRING_QUERY_FALSE;
22860
+ case PM_SLICE_TYPE_LOCAL:
22861
+ return PM_STRING_QUERY_TRUE;
22862
+ }
22863
+
22864
+ assert(false && "unreachable");
22865
+ return PM_STRING_QUERY_FALSE;
22866
+ }
22867
+
22868
+ /**
22869
+ * Check that the slice is a valid constant name.
22870
+ */
22871
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
22872
+ pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22873
+ switch (pm_slice_type(source, length, encoding_name)) {
22874
+ case PM_SLICE_TYPE_ERROR:
22875
+ return PM_STRING_QUERY_ERROR;
22876
+ case PM_SLICE_TYPE_NONE:
22877
+ case PM_SLICE_TYPE_LOCAL:
22878
+ case PM_SLICE_TYPE_METHOD_NAME:
22879
+ return PM_STRING_QUERY_FALSE;
22880
+ case PM_SLICE_TYPE_CONSTANT:
22881
+ return PM_STRING_QUERY_TRUE;
22882
+ }
22883
+
22884
+ assert(false && "unreachable");
22885
+ return PM_STRING_QUERY_FALSE;
22886
+ }
22887
+
22888
+ /**
22889
+ * Check that the slice is a valid method name.
22890
+ */
22891
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
22892
+ pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22893
+ #define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22894
+ #define C1(c) (*source == c)
22895
+ #define C2(s) (memcmp(source, s, 2) == 0)
22896
+ #define C3(s) (memcmp(source, s, 3) == 0)
22897
+
22898
+ switch (pm_slice_type(source, length, encoding_name)) {
22899
+ case PM_SLICE_TYPE_ERROR:
22900
+ return PM_STRING_QUERY_ERROR;
22901
+ case PM_SLICE_TYPE_NONE:
22902
+ break;
22903
+ case PM_SLICE_TYPE_LOCAL:
22904
+ // numbered parameters are not valid method names
22905
+ return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22906
+ case PM_SLICE_TYPE_CONSTANT:
22907
+ // all constants are valid method names
22908
+ case PM_SLICE_TYPE_METHOD_NAME:
22909
+ // all method names are valid method names
22910
+ return PM_STRING_QUERY_TRUE;
22911
+ }
22912
+
22913
+ switch (length) {
22914
+ case 1:
22915
+ return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22916
+ case 2:
22917
+ return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22918
+ case 3:
22919
+ return B(C3("===") || C3("<=>") || C3("[]="));
22920
+ default:
22921
+ return PM_STRING_QUERY_FALSE;
22922
+ }
22923
+
22924
+ #undef B
22925
+ #undef C1
22926
+ #undef C2
22927
+ #undef C3
22928
+ }