prism 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +51 -1
  3. data/Makefile +4 -2
  4. data/README.md +2 -0
  5. data/config.yml +266 -38
  6. data/docs/design.md +2 -2
  7. data/docs/parser_translation.md +8 -23
  8. data/docs/releasing.md +5 -24
  9. data/docs/ripper_translation.md +1 -1
  10. data/ext/prism/api_node.c +2 -0
  11. data/ext/prism/extension.c +25 -3
  12. data/ext/prism/extension.h +1 -1
  13. data/include/prism/ast.h +306 -50
  14. data/include/prism/diagnostic.h +5 -0
  15. data/include/prism/options.h +43 -3
  16. data/include/prism/regexp.h +2 -2
  17. data/include/prism/util/pm_buffer.h +8 -0
  18. data/include/prism/util/pm_integer.h +4 -0
  19. data/include/prism/util/pm_list.h +6 -0
  20. data/include/prism/util/pm_string.h +12 -2
  21. data/include/prism/version.h +2 -2
  22. data/include/prism.h +39 -14
  23. data/lib/prism/compiler.rb +456 -151
  24. data/lib/prism/desugar_compiler.rb +1 -0
  25. data/lib/prism/dispatcher.rb +16 -0
  26. data/lib/prism/dot_visitor.rb +5 -1
  27. data/lib/prism/dsl.rb +3 -0
  28. data/lib/prism/ffi.rb +25 -9
  29. data/lib/prism/inspect_visitor.rb +3 -0
  30. data/lib/prism/lex_compat.rb +1 -0
  31. data/lib/prism/mutation_compiler.rb +3 -0
  32. data/lib/prism/node.rb +507 -336
  33. data/lib/prism/node_ext.rb +4 -1
  34. data/lib/prism/pack.rb +2 -0
  35. data/lib/prism/parse_result/comments.rb +1 -0
  36. data/lib/prism/parse_result/errors.rb +1 -0
  37. data/lib/prism/parse_result/newlines.rb +1 -0
  38. data/lib/prism/parse_result.rb +1 -0
  39. data/lib/prism/pattern.rb +1 -0
  40. data/lib/prism/polyfill/scan_byte.rb +14 -0
  41. data/lib/prism/polyfill/warn.rb +36 -0
  42. data/lib/prism/reflection.rb +3 -0
  43. data/lib/prism/relocation.rb +1 -0
  44. data/lib/prism/serialize.rb +25 -19
  45. data/lib/prism/string_query.rb +1 -0
  46. data/lib/prism/translation/parser/builder.rb +1 -0
  47. data/lib/prism/translation/parser/compiler.rb +47 -25
  48. data/lib/prism/translation/parser/lexer.rb +29 -21
  49. data/lib/prism/translation/parser.rb +21 -2
  50. data/lib/prism/translation/parser33.rb +1 -0
  51. data/lib/prism/translation/parser34.rb +1 -0
  52. data/lib/prism/translation/parser35.rb +1 -0
  53. data/lib/prism/translation/parser_current.rb +24 -0
  54. data/lib/prism/translation/ripper/sexp.rb +1 -0
  55. data/lib/prism/translation/ripper.rb +17 -1
  56. data/lib/prism/translation/ruby_parser.rb +287 -4
  57. data/lib/prism/translation.rb +2 -0
  58. data/lib/prism/visitor.rb +457 -152
  59. data/lib/prism.rb +23 -0
  60. data/prism.gemspec +5 -1
  61. data/rbi/prism/dsl.rbi +3 -3
  62. data/rbi/prism/node.rbi +21 -9
  63. data/sig/prism/dispatcher.rbs +3 -0
  64. data/sig/prism/dsl.rbs +3 -3
  65. data/sig/prism/node.rbs +444 -30
  66. data/sig/prism/node_ext.rbs +84 -17
  67. data/sig/prism/parse_result/comments.rbs +38 -0
  68. data/sig/prism/parse_result.rbs +4 -0
  69. data/sig/prism/reflection.rbs +1 -1
  70. data/sig/prism.rbs +4 -0
  71. data/src/diagnostic.c +9 -1
  72. data/src/node.c +2 -0
  73. data/src/options.c +2 -2
  74. data/src/prettyprint.c +2 -0
  75. data/src/prism.c +324 -147
  76. data/src/serialize.c +2 -0
  77. data/src/token_type.c +36 -34
  78. data/src/util/pm_string.c +6 -8
  79. metadata +7 -3
data/src/prism.c CHANGED
@@ -1409,7 +1409,7 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1409
1409
  static inline void
1410
1410
  pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1411
1411
  if (pm_conditional_predicate_warn_write_literal_p(node)) {
1412
- pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1412
+ pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1413
1413
  }
1414
1414
  }
1415
1415
 
@@ -2622,10 +2622,11 @@ pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
2622
2622
  // There are certain flags that we want to use internally but don't want to
2623
2623
  // expose because they are not relevant beyond parsing. Therefore we'll define
2624
2624
  // them here and not define them in config.yml/a header file.
2625
- static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4;
2626
- static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40;
2627
- static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80;
2628
- static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100;
2625
+ static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
2626
+
2627
+ static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
2628
+ static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
2629
+ static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
2629
2630
 
2630
2631
  /**
2631
2632
  * Allocate and initialize a new CallNode node. This sets everything to NULL or
@@ -2976,7 +2977,7 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
2976
2977
  */
2977
2978
  static void
2978
2979
  pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2979
- if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
2980
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
2980
2981
  if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2981
2982
  pm_node_t *node;
2982
2983
  PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
@@ -3874,7 +3875,7 @@ pm_def_node_create(
3874
3875
  end = end_keyword->end;
3875
3876
  }
3876
3877
 
3877
- if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
3878
+ if (receiver != NULL) {
3878
3879
  pm_def_node_receiver_check(parser, receiver);
3879
3880
  }
3880
3881
 
@@ -4253,7 +4254,7 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4253
4254
  const uint8_t *point = memchr(start, '.', length);
4254
4255
  assert(point && "should have a decimal point");
4255
4256
 
4256
- uint8_t *digits = malloc(length);
4257
+ uint8_t *digits = xmalloc(length);
4257
4258
  if (digits == NULL) {
4258
4259
  fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4259
4260
  abort();
@@ -4266,7 +4267,7 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4266
4267
  digits[0] = '1';
4267
4268
  if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4268
4269
  pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4269
- free(digits);
4270
+ xfree(digits);
4270
4271
 
4271
4272
  pm_integers_reduce(&node->numerator, &node->denominator);
4272
4273
  return node;
@@ -5279,6 +5280,12 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_
5279
5280
 
5280
5281
  switch (PM_NODE_TYPE(part)) {
5281
5282
  case PM_STRING_NODE:
5283
+ // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
5284
+ // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
5285
+ // as long as this interpolation only consists of other string literals.
5286
+ if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
5287
+ pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5288
+ }
5282
5289
  part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5283
5290
  break;
5284
5291
  case PM_INTERPOLATED_STRING_NODE:
@@ -8582,85 +8589,66 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8582
8589
  /* Context manipulations */
8583
8590
  /******************************************************************************/
8584
8591
 
8585
- static bool
8586
- context_terminator(pm_context_t context, pm_token_t *token) {
8587
- switch (context) {
8588
- case PM_CONTEXT_MAIN:
8589
- case PM_CONTEXT_DEF_PARAMS:
8590
- case PM_CONTEXT_DEFINED:
8591
- case PM_CONTEXT_MULTI_TARGET:
8592
- case PM_CONTEXT_TERNARY:
8593
- case PM_CONTEXT_RESCUE_MODIFIER:
8594
- return token->type == PM_TOKEN_EOF;
8595
- case PM_CONTEXT_DEFAULT_PARAMS:
8596
- return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8597
- case PM_CONTEXT_PREEXE:
8598
- case PM_CONTEXT_POSTEXE:
8599
- return token->type == PM_TOKEN_BRACE_RIGHT;
8600
- case PM_CONTEXT_MODULE:
8601
- case PM_CONTEXT_CLASS:
8602
- case PM_CONTEXT_SCLASS:
8603
- case PM_CONTEXT_LAMBDA_DO_END:
8604
- case PM_CONTEXT_DEF:
8605
- case PM_CONTEXT_BLOCK_KEYWORDS:
8606
- return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
8607
- case PM_CONTEXT_WHILE:
8608
- case PM_CONTEXT_UNTIL:
8609
- case PM_CONTEXT_ELSE:
8610
- case PM_CONTEXT_FOR:
8611
- case PM_CONTEXT_BEGIN_ENSURE:
8612
- case PM_CONTEXT_BLOCK_ENSURE:
8613
- case PM_CONTEXT_CLASS_ENSURE:
8614
- case PM_CONTEXT_DEF_ENSURE:
8615
- case PM_CONTEXT_LAMBDA_ENSURE:
8616
- case PM_CONTEXT_MODULE_ENSURE:
8617
- case PM_CONTEXT_SCLASS_ENSURE:
8618
- return token->type == PM_TOKEN_KEYWORD_END;
8619
- case PM_CONTEXT_LOOP_PREDICATE:
8620
- return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN;
8621
- case PM_CONTEXT_FOR_INDEX:
8622
- return token->type == PM_TOKEN_KEYWORD_IN;
8623
- case PM_CONTEXT_CASE_WHEN:
8624
- return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8625
- case PM_CONTEXT_CASE_IN:
8626
- return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8627
- case PM_CONTEXT_IF:
8628
- case PM_CONTEXT_ELSIF:
8629
- return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
8630
- case PM_CONTEXT_UNLESS:
8631
- return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8632
- case PM_CONTEXT_EMBEXPR:
8633
- return token->type == PM_TOKEN_EMBEXPR_END;
8634
- case PM_CONTEXT_BLOCK_BRACES:
8635
- return token->type == PM_TOKEN_BRACE_RIGHT;
8636
- case PM_CONTEXT_PARENS:
8637
- return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8638
- case PM_CONTEXT_BEGIN:
8639
- case PM_CONTEXT_BEGIN_RESCUE:
8640
- case PM_CONTEXT_BLOCK_RESCUE:
8641
- case PM_CONTEXT_CLASS_RESCUE:
8642
- case PM_CONTEXT_DEF_RESCUE:
8643
- case PM_CONTEXT_LAMBDA_RESCUE:
8644
- case PM_CONTEXT_MODULE_RESCUE:
8645
- case PM_CONTEXT_SCLASS_RESCUE:
8646
- return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8647
- case PM_CONTEXT_BEGIN_ELSE:
8648
- case PM_CONTEXT_BLOCK_ELSE:
8649
- case PM_CONTEXT_CLASS_ELSE:
8650
- case PM_CONTEXT_DEF_ELSE:
8651
- case PM_CONTEXT_LAMBDA_ELSE:
8652
- case PM_CONTEXT_MODULE_ELSE:
8653
- case PM_CONTEXT_SCLASS_ELSE:
8654
- return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
8655
- case PM_CONTEXT_LAMBDA_BRACES:
8656
- return token->type == PM_TOKEN_BRACE_RIGHT;
8657
- case PM_CONTEXT_PREDICATE:
8658
- return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
8659
- case PM_CONTEXT_NONE:
8660
- return false;
8661
- }
8592
+ static const uint32_t context_terminators[] = {
8593
+ [PM_CONTEXT_NONE] = 0,
8594
+ [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8595
+ [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
8596
+ [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
8597
+ [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8598
+ [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
8599
+ [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
8600
+ [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
8601
+ [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
8602
+ [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8603
+ [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
8604
+ [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
8605
+ [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
8606
+ [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
8607
+ [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
8608
+ [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8609
+ [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
8610
+ [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
8611
+ [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
8612
+ [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8613
+ [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
8614
+ [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
8615
+ [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
8616
+ [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
8617
+ [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
8618
+ [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
8619
+ [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
8620
+ [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
8621
+ [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
8622
+ [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
8623
+ [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
8624
+ [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
8625
+ [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
8626
+ [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8627
+ [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
8628
+ [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
8629
+ [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
8630
+ [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
8631
+ [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
8632
+ [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8633
+ [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
8634
+ [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
8635
+ [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
8636
+ [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
8637
+ [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
8638
+ [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
8639
+ [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
8640
+ [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
8641
+ [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
8642
+ [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8643
+ [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
8644
+ [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8645
+ [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
8646
+ [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
8647
+ };
8662
8648
 
8663
- return false;
8649
+ static inline bool
8650
+ context_terminator(pm_context_t context, pm_token_t *token) {
8651
+ return token->type < 32 && (context_terminators[context] & (1U << token->type));
8664
8652
  }
8665
8653
 
8666
8654
  /**
@@ -9109,7 +9097,7 @@ lex_global_variable(pm_parser_t *parser) {
9109
9097
  } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9110
9098
 
9111
9099
  // $0 isn't allowed to be followed by anything.
9112
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9100
+ pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9113
9101
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
9114
9102
  }
9115
9103
 
@@ -9146,7 +9134,7 @@ lex_global_variable(pm_parser_t *parser) {
9146
9134
  } else {
9147
9135
  // If we get here, then we have a $ followed by something that
9148
9136
  // isn't recognized as a global variable.
9149
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9137
+ pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9150
9138
  const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9151
9139
  PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9152
9140
  }
@@ -10173,7 +10161,7 @@ lex_at_variable(pm_parser_t *parser) {
10173
10161
  }
10174
10162
  } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
10175
10163
  pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
10176
- if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
10164
+ if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
10177
10165
  diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
10178
10166
  }
10179
10167
 
@@ -10849,14 +10837,37 @@ parser_lex(pm_parser_t *parser) {
10849
10837
  following = next_newline(following, parser->end - following);
10850
10838
  }
10851
10839
 
10852
- // If the lex state was ignored, or we hit a '.' or a '&.',
10853
- // we will lex the ignored newline
10840
+ // If the lex state was ignored, we will lex the
10841
+ // ignored newline.
10842
+ if (lex_state_ignored_p(parser)) {
10843
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
10844
+ lexed_comment = false;
10845
+ goto lex_next_token;
10846
+ }
10847
+
10848
+ // If we hit a '.' or a '&.' we will lex the ignored
10849
+ // newline.
10850
+ if (following && (
10851
+ (peek_at(parser, following) == '.') ||
10852
+ (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10853
+ )) {
10854
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
10855
+ lexed_comment = false;
10856
+ goto lex_next_token;
10857
+ }
10858
+
10859
+
10860
+ // If we are parsing as CRuby 3.5 or later and we
10861
+ // hit a '&&' or a '||' then we will lex the ignored
10862
+ // newline.
10854
10863
  if (
10855
- lex_state_ignored_p(parser) ||
10856
- (following && (
10857
- (peek_at(parser, following) == '.') ||
10858
- (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10859
- ))
10864
+ (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_5) &&
10865
+ following && (
10866
+ (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
10867
+ (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
10868
+ (peek_at(parser, following) == 'a' && peek_at(parser, following + 1) == 'n' && peek_at(parser, following + 2) == 'd' && !char_is_identifier(parser, following + 3, parser->end - (following + 3))) ||
10869
+ (peek_at(parser, following) == 'o' && peek_at(parser, following + 1) == 'r' && !char_is_identifier(parser, following + 2, parser->end - (following + 2)))
10870
+ )
10860
10871
  ) {
10861
10872
  if (!lexed_comment) parser_lex_ignored_newline(parser);
10862
10873
  lexed_comment = false;
@@ -10896,6 +10907,63 @@ parser_lex(pm_parser_t *parser) {
10896
10907
  parser->next_start = NULL;
10897
10908
  LEX(PM_TOKEN_AMPERSAND_DOT);
10898
10909
  }
10910
+
10911
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_5) {
10912
+ // If we hit an && then we are in a logical chain
10913
+ // and we need to return the logical operator.
10914
+ if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
10915
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
10916
+ lex_state_set(parser, PM_LEX_STATE_BEG);
10917
+ parser->current.start = next_content;
10918
+ parser->current.end = next_content + 2;
10919
+ parser->next_start = NULL;
10920
+ LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10921
+ }
10922
+
10923
+ // If we hit a || then we are in a logical chain and
10924
+ // we need to return the logical operator.
10925
+ if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
10926
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
10927
+ lex_state_set(parser, PM_LEX_STATE_BEG);
10928
+ parser->current.start = next_content;
10929
+ parser->current.end = next_content + 2;
10930
+ parser->next_start = NULL;
10931
+ LEX(PM_TOKEN_PIPE_PIPE);
10932
+ }
10933
+
10934
+ // If we hit an 'and' then we are in a logical chain
10935
+ // and we need to return the logical operator.
10936
+ if (
10937
+ peek_at(parser, next_content) == 'a' &&
10938
+ peek_at(parser, next_content + 1) == 'n' &&
10939
+ peek_at(parser, next_content + 2) == 'd' &&
10940
+ !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
10941
+ ) {
10942
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
10943
+ lex_state_set(parser, PM_LEX_STATE_BEG);
10944
+ parser->current.start = next_content;
10945
+ parser->current.end = next_content + 3;
10946
+ parser->next_start = NULL;
10947
+ parser->command_start = true;
10948
+ LEX(PM_TOKEN_KEYWORD_AND);
10949
+ }
10950
+
10951
+ // If we hit a 'or' then we are in a logical chain
10952
+ // and we need to return the logical operator.
10953
+ if (
10954
+ peek_at(parser, next_content) == 'o' &&
10955
+ peek_at(parser, next_content + 1) == 'r' &&
10956
+ !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
10957
+ ) {
10958
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
10959
+ lex_state_set(parser, PM_LEX_STATE_BEG);
10960
+ parser->current.start = next_content;
10961
+ parser->current.end = next_content + 2;
10962
+ parser->next_start = NULL;
10963
+ parser->command_start = true;
10964
+ LEX(PM_TOKEN_KEYWORD_OR);
10965
+ }
10966
+ }
10899
10967
  }
10900
10968
 
10901
10969
  // At this point we know this is a regular newline, and we can set the
@@ -13142,14 +13210,6 @@ match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
13142
13210
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13143
13211
  }
13144
13212
 
13145
- /**
13146
- * Returns true if the current token is any of the nine given types.
13147
- */
13148
- static inline bool
13149
- match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
13150
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
13151
- }
13152
-
13153
13213
  /**
13154
13214
  * If the current token is of the specified type, lex forward by one token and
13155
13215
  * return true. Otherwise, return false. For example:
@@ -14390,6 +14450,17 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
14390
14450
  if (accepted_newline) {
14391
14451
  pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14392
14452
  }
14453
+
14454
+ // If this is a command call and an argument takes a block,
14455
+ // there can be no further arguments. For example,
14456
+ // `foo(bar 1 do end, 2)` should be rejected.
14457
+ if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
14458
+ pm_call_node_t *call = (pm_call_node_t *) argument;
14459
+ if (call->opening_loc.start == NULL && call->arguments != NULL && call->block != NULL) {
14460
+ pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14461
+ break;
14462
+ }
14463
+ }
14393
14464
  } else {
14394
14465
  // If there is no comma at the end of the argument list then we're
14395
14466
  // done parsing arguments and can break out of this loop.
@@ -14541,6 +14612,18 @@ update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_ord
14541
14612
  return true;
14542
14613
  }
14543
14614
 
14615
+ /**
14616
+ * Ensures that after parsing a parameter, the next token is not `=`.
14617
+ * Some parameters like `def(* = 1)` cannot become optional. When no parens
14618
+ * are present like in `def * = 1`, this creates ambiguity with endless method definitions.
14619
+ */
14620
+ static inline void
14621
+ refute_optional_parameter(pm_parser_t *parser) {
14622
+ if (match1(parser, PM_TOKEN_EQUAL)) {
14623
+ pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
14624
+ }
14625
+ }
14626
+
14544
14627
  /**
14545
14628
  * Parse a list of parameters on a method definition.
14546
14629
  */
@@ -14593,6 +14676,10 @@ parse_parameters(
14593
14676
  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14594
14677
  }
14595
14678
 
14679
+ if (!uses_parentheses) {
14680
+ refute_optional_parameter(parser);
14681
+ }
14682
+
14596
14683
  pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
14597
14684
  if (repeated) {
14598
14685
  pm_node_flag_set_repeated_parameter((pm_node_t *)param);
@@ -14614,6 +14701,10 @@ parse_parameters(
14614
14701
  bool succeeded = update_parameter_state(parser, &parser->current, &order);
14615
14702
  parser_lex(parser);
14616
14703
 
14704
+ if (!uses_parentheses) {
14705
+ refute_optional_parameter(parser);
14706
+ }
14707
+
14617
14708
  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14618
14709
  pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14619
14710
 
@@ -14671,7 +14762,7 @@ parse_parameters(
14671
14762
  parser_lex(parser);
14672
14763
 
14673
14764
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14674
- uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14765
+ uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14675
14766
 
14676
14767
  if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14677
14768
  pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
@@ -14687,7 +14778,7 @@ parse_parameters(
14687
14778
  // If the value of the parameter increased the number of
14688
14779
  // reads of that parameter, then we need to warn that we
14689
14780
  // have a circular definition.
14690
- if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14781
+ if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14691
14782
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14692
14783
  }
14693
14784
 
@@ -14772,13 +14863,13 @@ parse_parameters(
14772
14863
 
14773
14864
  if (token_begins_expression_p(parser->current.type)) {
14774
14865
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14775
- uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14866
+ uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14776
14867
 
14777
14868
  if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14778
14869
  pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14779
14870
  if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14780
14871
 
14781
- if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14872
+ if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14782
14873
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14783
14874
  }
14784
14875
 
@@ -14795,6 +14886,10 @@ parse_parameters(
14795
14886
  context_pop(parser);
14796
14887
  pm_parameters_node_keywords_append(params, param);
14797
14888
 
14889
+ if (!uses_parentheses) {
14890
+ refute_optional_parameter(parser);
14891
+ }
14892
+
14798
14893
  // If parsing the value of the parameter resulted in error recovery,
14799
14894
  // then we can put a missing node in its place and stop parsing the
14800
14895
  // parameters entirely now.
@@ -14826,6 +14921,10 @@ parse_parameters(
14826
14921
  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14827
14922
  }
14828
14923
 
14924
+ if (!uses_parentheses) {
14925
+ refute_optional_parameter(parser);
14926
+ }
14927
+
14829
14928
  pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
14830
14929
  if (repeated) {
14831
14930
  pm_node_flag_set_repeated_parameter(param);
@@ -14874,6 +14973,10 @@ parse_parameters(
14874
14973
  }
14875
14974
  }
14876
14975
 
14976
+ if (!uses_parentheses) {
14977
+ refute_optional_parameter(parser);
14978
+ }
14979
+
14877
14980
  if (params->keyword_rest == NULL) {
14878
14981
  pm_parameters_node_keyword_rest_set(params, param);
14879
14982
  } else {
@@ -16482,7 +16585,7 @@ parse_variable(pm_parser_t *parser) {
16482
16585
  pm_node_list_append(&current_scope->implicit_parameters, node);
16483
16586
 
16484
16587
  return node;
16485
- } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16588
+ } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16486
16589
  pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
16487
16590
  pm_node_list_append(&current_scope->implicit_parameters, node);
16488
16591
 
@@ -17412,6 +17515,14 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
17412
17515
  // If we found a label, we need to immediately return to the caller.
17413
17516
  if (pm_symbol_node_label_p(node)) return node;
17414
17517
 
17518
+ // Call nodes (arithmetic operations) are not allowed in patterns
17519
+ if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
17520
+ pm_parser_err_node(parser, node, diag_id);
17521
+ pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end);
17522
+ pm_node_destroy(parser, node);
17523
+ return (pm_node_t *) missing_node;
17524
+ }
17525
+
17415
17526
  // Now that we have a primitive, we need to check if it's part of a range.
17416
17527
  if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17417
17528
  pm_token_t operator = parser->previous;
@@ -17694,7 +17805,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
17694
17805
  // Gather up all of the patterns into the list.
17695
17806
  while (accept1(parser, PM_TOKEN_COMMA)) {
17696
17807
  // Break early here in case we have a trailing comma.
17697
- if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17808
+ if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17698
17809
  node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17699
17810
  pm_node_list_append(&nodes, node);
17700
17811
  trailing_rest = true;
@@ -18430,20 +18541,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18430
18541
  return (pm_node_t *) node;
18431
18542
  }
18432
18543
  case PM_TOKEN_CHARACTER_LITERAL: {
18433
- parser_lex(parser);
18434
-
18435
- pm_token_t opening = parser->previous;
18436
- opening.type = PM_TOKEN_STRING_BEGIN;
18437
- opening.end = opening.start + 1;
18438
-
18439
- pm_token_t content = parser->previous;
18440
- content.type = PM_TOKEN_STRING_CONTENT;
18441
- content.start = content.start + 1;
18442
-
18443
18544
  pm_token_t closing = not_provided(parser);
18444
- pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
18545
+ pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(
18546
+ parser,
18547
+ &(pm_token_t) {
18548
+ .type = PM_TOKEN_STRING_BEGIN,
18549
+ .start = parser->current.start,
18550
+ .end = parser->current.start + 1
18551
+ },
18552
+ &(pm_token_t) {
18553
+ .type = PM_TOKEN_STRING_CONTENT,
18554
+ .start = parser->current.start + 1,
18555
+ .end = parser->current.end
18556
+ },
18557
+ &closing
18558
+ );
18559
+
18445
18560
  pm_node_flag_set(node, parse_unescaped_encoding(parser));
18446
18561
 
18562
+ // Skip past the character literal here, since now we have handled
18563
+ // parser->explicit_encoding correctly.
18564
+ parser_lex(parser);
18565
+
18447
18566
  // Characters can be followed by strings in which case they are
18448
18567
  // automatically concatenated.
18449
18568
  if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
@@ -18585,17 +18704,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18585
18704
  call->closing_loc = arguments.closing_loc;
18586
18705
  call->block = arguments.block;
18587
18706
 
18588
- if (arguments.block != NULL) {
18589
- call->base.location.end = arguments.block->location.end;
18590
- } else if (arguments.closing_loc.start == NULL) {
18591
- if (arguments.arguments != NULL) {
18592
- call->base.location.end = arguments.arguments->base.location.end;
18593
- } else {
18594
- call->base.location.end = call->message_loc.end;
18595
- }
18596
- } else {
18597
- call->base.location.end = arguments.closing_loc.end;
18707
+ const uint8_t *end = pm_arguments_end(&arguments);
18708
+ if (!end) {
18709
+ end = call->message_loc.end;
18598
18710
  }
18711
+ call->base.location.end = end;
18599
18712
  }
18600
18713
  } else {
18601
18714
  // Otherwise, we know the identifier is in the local table. This
@@ -19123,7 +19236,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19123
19236
  pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19124
19237
 
19125
19238
  if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19239
+ pm_token_t next = parser->current;
19126
19240
  parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
19241
+
19242
+ // Reject `foo && return bar`.
19243
+ if (!accepts_command_call && arguments.arguments != NULL) {
19244
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type));
19245
+ }
19127
19246
  }
19128
19247
  }
19129
19248
 
@@ -19520,7 +19639,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19520
19639
  pm_do_loop_stack_push(parser, false);
19521
19640
  statements = (pm_node_t *) pm_statements_node_create(parser);
19522
19641
 
19523
- pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19642
+ bool allow_command_call;
19643
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_5) {
19644
+ allow_command_call = accepts_command_call;
19645
+ } else {
19646
+ // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
19647
+ allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
19648
+ }
19649
+
19650
+ pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19524
19651
 
19525
19652
  if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
19526
19653
  context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
@@ -19607,18 +19734,27 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19607
19734
  pm_token_t lparen;
19608
19735
  pm_token_t rparen;
19609
19736
  pm_node_t *expression;
19737
+
19610
19738
  context_push(parser, PM_CONTEXT_DEFINED);
19739
+ bool newline = accept1(parser, PM_TOKEN_NEWLINE);
19611
19740
 
19612
19741
  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19613
19742
  lparen = parser->previous;
19614
- expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19615
19743
 
19616
- if (parser->recovering) {
19744
+ if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19745
+ expression = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
19746
+ lparen = not_provided(parser);
19617
19747
  rparen = not_provided(parser);
19618
19748
  } else {
19619
- accept1(parser, PM_TOKEN_NEWLINE);
19620
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19621
- rparen = parser->previous;
19749
+ expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19750
+
19751
+ if (parser->recovering) {
19752
+ rparen = not_provided(parser);
19753
+ } else {
19754
+ accept1(parser, PM_TOKEN_NEWLINE);
19755
+ expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19756
+ rparen = parser->previous;
19757
+ }
19622
19758
  }
19623
19759
  } else {
19624
19760
  lparen = not_provided(parser);
@@ -19766,6 +19902,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19766
19902
  pm_arguments_t arguments = { 0 };
19767
19903
  pm_node_t *receiver = NULL;
19768
19904
 
19905
+ // If we do not accept a command call, then we also do not accept a
19906
+ // not without parentheses. In this case we need to reject this
19907
+ // syntax.
19908
+ if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19909
+ if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19910
+ pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19911
+ } else {
19912
+ accept1(parser, PM_TOKEN_NEWLINE);
19913
+ pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19914
+ }
19915
+
19916
+ return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
19917
+ }
19918
+
19769
19919
  accept1(parser, PM_TOKEN_NEWLINE);
19770
19920
 
19771
19921
  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
@@ -20809,7 +20959,7 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
20809
20959
  bool permitted = true;
20810
20960
  if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20811
20961
 
20812
- pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id, (uint16_t) (depth + 1));
20962
+ pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MODIFIER, diag_id, (uint16_t) (depth + 1));
20813
20963
  if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20814
20964
 
20815
20965
  parse_assignment_value_local(parser, value);
@@ -21167,6 +21317,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21167
21317
  }
21168
21318
  PRISM_FALLTHROUGH
21169
21319
  case PM_CASE_WRITABLE: {
21320
+ // When we have `it = value`, we need to add `it` as a local
21321
+ // variable before parsing the value, in case the value
21322
+ // references the variable.
21323
+ if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
21324
+ pm_parser_local_add_location(parser, node->location.start, node->location.end, 0);
21325
+ }
21326
+
21170
21327
  parser_lex(parser);
21171
21328
  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21172
21329
 
@@ -22160,6 +22317,12 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
22160
22317
  ) {
22161
22318
  node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
22162
22319
 
22320
+ if (context_terminator(parser->current_context->context, &parser->current)) {
22321
+ // If this token terminates the current context, then we need to
22322
+ // stop parsing the expression, as it has become a statement.
22323
+ return node;
22324
+ }
22325
+
22163
22326
  switch (PM_NODE_TYPE(node)) {
22164
22327
  case PM_MULTI_WRITE_NODE:
22165
22328
  // Multi-write nodes are statements, and cannot be followed by
@@ -22393,9 +22556,10 @@ parse_program(pm_parser_t *parser) {
22393
22556
  statements = wrap_statements(parser, statements);
22394
22557
  } else {
22395
22558
  flush_block_exits(parser, previous_block_exits);
22396
- pm_node_list_free(&current_block_exits);
22397
22559
  }
22398
22560
 
22561
+ pm_node_list_free(&current_block_exits);
22562
+
22399
22563
  // If this is an empty file, then we're still going to parse all of the
22400
22564
  // statements in order to gather up all of the comments and such. Here we'll
22401
22565
  // correct the location information.
@@ -22614,6 +22778,12 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
22614
22778
  }
22615
22779
  }
22616
22780
 
22781
+ // Now that we have established the user-provided options, check if
22782
+ // a version was given and parse as the latest version otherwise.
22783
+ if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22784
+ parser->version = PM_OPTIONS_VERSION_LATEST;
22785
+ }
22786
+
22617
22787
  pm_accepts_block_stack_push(parser, true);
22618
22788
 
22619
22789
  // Skip past the UTF-8 BOM if it exists.
@@ -22667,7 +22837,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
22667
22837
  }
22668
22838
 
22669
22839
  search_shebang = false;
22670
- } else if (options->main_script && !parser->parsing_eval) {
22840
+ } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22671
22841
  search_shebang = true;
22672
22842
  }
22673
22843
  }
@@ -22807,7 +22977,7 @@ pm_parse(pm_parser_t *parser) {
22807
22977
  * otherwise return true.
22808
22978
  */
22809
22979
  static bool
22810
- pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets) {
22980
+ pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
22811
22981
  #define LINE_SIZE 4096
22812
22982
  char line[LINE_SIZE];
22813
22983
 
@@ -22843,6 +23013,12 @@ pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t
22843
23013
  if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22844
23014
  break;
22845
23015
  }
23016
+
23017
+ // All data should be read via gets. If the string returned by gets
23018
+ // _doesn't_ end with a newline, then we assume we hit EOF condition.
23019
+ if (stream_feof(stream)) {
23020
+ break;
23021
+ }
22846
23022
  }
22847
23023
 
22848
23024
  return true;
@@ -22878,16 +23054,17 @@ pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22878
23054
  * can stream stdin in to Ruby so we need to support a streaming API.
22879
23055
  */
22880
23056
  PRISM_EXPORTED_FUNCTION pm_node_t *
22881
- pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options) {
23057
+ pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
22882
23058
  pm_buffer_init(buffer);
22883
23059
 
22884
- bool eof = pm_parse_stream_read(buffer, stream, stream_fgets);
23060
+ bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
23061
+
22885
23062
  pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22886
23063
  pm_node_t *node = pm_parse(parser);
22887
23064
 
22888
23065
  while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22889
23066
  pm_node_destroy(parser, node);
22890
- eof = pm_parse_stream_read(buffer, stream, stream_fgets);
23067
+ eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22891
23068
 
22892
23069
  pm_parser_free(parser);
22893
23070
  pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
@@ -22979,13 +23156,13 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
22979
23156
  * given stream into to the given buffer.
22980
23157
  */
22981
23158
  PRISM_EXPORTED_FUNCTION void
22982
- pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data) {
23159
+ pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
22983
23160
  pm_parser_t parser;
22984
23161
  pm_options_t options = { 0 };
22985
23162
  pm_options_read(&options, data);
22986
23163
 
22987
23164
  pm_buffer_t parser_buffer;
22988
- pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, &options);
23165
+ pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
22989
23166
  pm_serialize_header(buffer);
22990
23167
  pm_serialize_content(&parser, node, buffer);
22991
23168
  pm_buffer_append_byte(buffer, '\0');