RubyGems - prism - Versions diffs - 1.2.0 → 1.3.0 - Mend

prism 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +23 -1
data/Makefile +1 -1
data/config.yml +420 -2
data/docs/build_system.md +8 -11
data/docs/relocation.md +34 -0
data/ext/prism/api_node.c +18 -10
data/ext/prism/extconf.rb +13 -36
data/ext/prism/extension.c +68 -0
data/ext/prism/extension.h +1 -1
data/include/prism/ast.h +426 -2
data/include/prism/defines.h +22 -7
data/include/prism/version.h +2 -2
data/include/prism.h +47 -0
data/lib/prism/dot_visitor.rb +10 -0
data/lib/prism/dsl.rb +4 -4
data/lib/prism/ffi.rb +49 -2
data/lib/prism/inspect_visitor.rb +2 -0
data/lib/prism/node.rb +1838 -95
data/lib/prism/parse_result/errors.rb +1 -1
data/lib/prism/parse_result.rb +2 -2
data/lib/prism/reflection.rb +2 -2
data/lib/prism/relocation.rb +504 -0
data/lib/prism/serialize.rb +5 -5
data/lib/prism/string_query.rb +30 -0
data/lib/prism/translation/parser/compiler.rb +36 -26
data/lib/prism/translation/ruby_parser.rb +12 -3
data/lib/prism.rb +6 -4
data/prism.gemspec +7 -1
data/rbi/prism/dsl.rbi +4 -4
data/rbi/prism/node.rbi +22 -10
data/rbi/prism/string_query.rbi +12 -0
data/sig/prism/dsl.rbs +2 -2
data/sig/prism/node.rbs +12 -8
data/sig/prism/relocation.rbs +185 -0
data/sig/prism/string_query.rbs +11 -0
data/src/node.c +18 -0
data/src/prettyprint.c +32 -0
data/src/prism.c +364 -81
data/src/regexp.c +7 -3
data/src/serialize.c +12 -0
data/src/static_literals.c +1 -1
data/src/util/pm_char.c +1 -1
data/src/util/pm_string.c +1 -0
metadata +9 -3

data/src/prism.c CHANGED Viewed

@@ -4142,7 +4142,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
     // If errno is set, then it should only be ERANGE. At this point we need to
     // check if it's infinity (it should be).
-    if (errno == ERANGE && isinf(value)) {
+    if (errno == ERANGE && PRISM_ISINF(value)) {
         int warn_width;
         const char *ellipsis;
@@ -7684,7 +7684,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
  * Allocate a new UntilNode node.
  */
 static pm_until_node_t *
-pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
+pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
     pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
     pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
@@ -7699,6 +7699,7 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
             },
         },
         .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
         .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
         .predicate = predicate,
         .statements = statements
@@ -7727,6 +7728,7 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
             },
         },
         .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
         .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
         .predicate = predicate,
         .statements = statements
@@ -7794,7 +7796,7 @@ pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statemen
  * Allocate a new WhileNode node.
  */
 static pm_while_node_t *
-pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
+pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
     pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
     pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
@@ -7809,6 +7811,7 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
             },
         },
         .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
         .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
         .predicate = predicate,
         .statements = statements
@@ -7837,6 +7840,7 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
             },
         },
         .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
         .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
         .predicate = predicate,
         .statements = statements
@@ -7859,6 +7863,7 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
             .location = PM_LOCATION_NULL_VALUE(parser)
         },
         .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
+        .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
         .closing_loc = PM_LOCATION_NULL_VALUE(parser),
         .predicate = predicate,
         .statements = statements
@@ -9105,7 +9110,7 @@ lex_global_variable(pm_parser_t *parser) {
         case '-':
             parser->current.end++;
             allow_multiple = false;
-            /* fallthrough */
+            PRISM_FALLTHROUGH
         default: {
             size_t width;
@@ -10041,8 +10046,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
                 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
                 return;
             }
+            PRISM_FALLTHROUGH
         }
-        /* fallthrough */
         default: {
             if (parser->current.end < parser->end) {
                 escape_write_escape_encoded(parser, buffer);
@@ -10496,6 +10501,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
     }
     const uint8_t *end = parser->current.end - 1;
+    assert(end >= start);
     pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
     token_buffer->cursor = end;
@@ -10576,9 +10582,15 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
             pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
         }
-        const uint8_t delimiter = *parser->current.end;
-        parser->current.end += eol_length;
+        uint8_t delimiter = *parser->current.end;
+        // If our delimiter is \r\n, we want to treat it as if it's \n.
+        // For example, %\r\nfoo\r\n should be "foo"
+        if (eol_length == 2) {
+            delimiter = *(parser->current.end + 1);
+        }
+        parser->current.end += eol_length;
         return delimiter;
     }
@@ -10688,6 +10700,14 @@ parser_lex(pm_parser_t *parser) {
             // We'll check if we're at the end of the file. If we are, then we
             // need to return the EOF token.
             if (parser->current.end >= parser->end) {
+                // If we hit EOF, but the EOF came immediately after a newline,
+                // set the start of the token to the newline.  This way any EOF
+                // errors will be reported as happening on that line rather than
+                // a line after.  For example "foo(\n" should report an error
+                // on line 1 even though EOF technically occurs on line 2.
+                if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
+                    parser->current.start -= 1;
+                }
                 LEX(PM_TOKEN_EOF);
             }
@@ -10730,7 +10750,7 @@ parser_lex(pm_parser_t *parser) {
                     lexed_comment = true;
                 }
-                /* fallthrough */
+                PRISM_FALLTHROUGH
                 case '\r':
                 case '\n': {
                     parser->semantic_token_seen = semantic_token_seen & 0x1;
@@ -10772,7 +10792,7 @@ parser_lex(pm_parser_t *parser) {
                                 parser->current.type = PM_TOKEN_NEWLINE;
                                 return;
                             }
-                            /* fallthrough */
+                            PRISM_FALLTHROUGH
                         case PM_IGNORED_NEWLINE_ALL:
                             if (!lexed_comment) parser_lex_ignored_newline(parser);
                             lexed_comment = false;
@@ -10869,6 +10889,10 @@ parser_lex(pm_parser_t *parser) {
                 // ,
                 case ',':
+                    if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
+                    }
                     lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
                     LEX(PM_TOKEN_COMMA);
@@ -11783,7 +11807,7 @@ parser_lex(pm_parser_t *parser) {
                                             PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
                                             break;
                                         }
-                                        /* fallthrough */
+                                        PRISM_FALLTHROUGH
                                     default:
                                         PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
                                         break;
@@ -11980,7 +12004,7 @@ parser_lex(pm_parser_t *parser) {
                                 pm_token_buffer_push_byte(&token_buffer, '\r');
                                 break;
                             }
-                        /* fallthrough */
+                        PRISM_FALLTHROUGH
                         case '\n':
                             pm_token_buffer_push_byte(&token_buffer, '\n');
@@ -12084,9 +12108,28 @@ parser_lex(pm_parser_t *parser) {
             pm_regexp_token_buffer_t token_buffer = { 0 };
             while (breakpoint != NULL) {
+                uint8_t term = lex_mode->as.regexp.terminator;
+                bool is_terminator = (*breakpoint == term);
+                // If the terminator is newline, we need to consider \r\n _also_ a newline
+                // For example: `%\nfoo\r\n`
+                // The string should be "foo", not "foo\r"
+                if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
+                    if (term == '\n') {
+                        is_terminator = true;
+                    }
+                    // If the terminator is a CR, but we see a CRLF, we need to
+                    // treat the CRLF as a newline, meaning this is _not_ the
+                    // terminator
+                    if (term == '\r') {
+                        is_terminator = false;
+                    }
+                }
                 // If we hit the terminator, we need to determine what kind of
                 // token to return.
-                if (*breakpoint == lex_mode->as.regexp.terminator) {
+                if (is_terminator) {
                     if (lex_mode->as.regexp.nesting > 0) {
                         parser->current.end = breakpoint + 1;
                         breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
@@ -12148,7 +12191,7 @@ parser_lex(pm_parser_t *parser) {
                         pm_regexp_token_buffer_escape(parser, &token_buffer);
                         token_buffer.base.cursor = breakpoint;
-                        /* fallthrough */
+                        PRISM_FALLTHROUGH
                     case '\n':
                         // If we've hit a newline, then we need to track that in
                         // the list of newlines.
@@ -12190,7 +12233,7 @@ parser_lex(pm_parser_t *parser) {
                                     pm_token_buffer_push_byte(&token_buffer.base, '\r');
                                     break;
                                 }
-                            /* fallthrough */
+                            PRISM_FALLTHROUGH
                             case '\n':
                                 if (parser->heredoc_end) {
                                     // ... if we are on the same line as a heredoc,
@@ -12316,10 +12359,29 @@ parser_lex(pm_parser_t *parser) {
                     continue;
                 }
+                uint8_t term = lex_mode->as.string.terminator;
+                bool is_terminator = (*breakpoint == term);
+                // If the terminator is newline, we need to consider \r\n _also_ a newline
+                // For example: `%r\nfoo\r\n`
+                // The string should be /foo/, not /foo\r/
+                if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
+                    if (term == '\n') {
+                        is_terminator = true;
+                    }
+                    // If the terminator is a CR, but we see a CRLF, we need to
+                    // treat the CRLF as a newline, meaning this is _not_ the
+                    // terminator
+                    if (term == '\r') {
+                        is_terminator = false;
+                    }
+                }
                 // Note that we have to check the terminator here first because we could
                 // potentially be parsing a % string that has a # character as the
                 // terminator.
-                if (*breakpoint == lex_mode->as.string.terminator) {
+                if (is_terminator) {
                     // If this terminator doesn't actually close the string, then we need
                     // to continue on past it.
                     if (lex_mode->as.string.nesting > 0) {
@@ -12379,7 +12441,7 @@ parser_lex(pm_parser_t *parser) {
                         pm_token_buffer_escape(parser, &token_buffer);
                         token_buffer.cursor = breakpoint;
-                        /* fallthrough */
+                        PRISM_FALLTHROUGH
                     case '\n':
                         // When we hit a newline, we need to flush any potential
                         // heredocs. Note that this has to happen after we check
@@ -12424,7 +12486,7 @@ parser_lex(pm_parser_t *parser) {
                                     pm_token_buffer_push_byte(&token_buffer, '\r');
                                     break;
                                 }
-                            /* fallthrough */
+                            PRISM_FALLTHROUGH
                             case '\n':
                                 if (!lex_mode->as.string.interpolation) {
                                     pm_token_buffer_push_byte(&token_buffer, '\\');
@@ -12632,7 +12694,7 @@ parser_lex(pm_parser_t *parser) {
                         pm_token_buffer_escape(parser, &token_buffer);
                         token_buffer.cursor = breakpoint;
-                        /* fallthrough */
+                        PRISM_FALLTHROUGH
                     case '\n': {
                         if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
                             parser_flush_heredoc_end(parser);
@@ -12732,7 +12794,7 @@ parser_lex(pm_parser_t *parser) {
                                         pm_token_buffer_push_byte(&token_buffer, '\r');
                                         break;
                                     }
-                                /* fallthrough */
+                                PRISM_FALLTHROUGH
                                 case '\n':
                                     pm_token_buffer_push_byte(&token_buffer, '\\');
                                     pm_token_buffer_push_byte(&token_buffer, '\n');
@@ -12752,7 +12814,7 @@ parser_lex(pm_parser_t *parser) {
                                         pm_token_buffer_push_byte(&token_buffer, '\r');
                                         break;
                                     }
-                                /* fallthrough */
+                                PRISM_FALLTHROUGH
                                 case '\n':
                                     // If we are in a tilde here, we should
                                     // break out of the loop and return the
@@ -13044,14 +13106,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
     return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
 }
-/**
- * Returns true if the current token is any of the six given types.
- */
-static inline bool
-match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
-    return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
-}
 /**
  * Returns true if the current token is any of the seven given types.
  */
@@ -13068,6 +13122,14 @@ match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
     return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
 }
+/**
+ * Returns true if the current token is any of the nine given types.
+ */
+static inline bool
+match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
+    return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
+}
 /**
  * If the current token is of the specified type, lex forward by one token and
  * return true. Otherwise, return false. For example:
@@ -13096,19 +13158,6 @@ accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
     return false;
 }
-/**
- * If the current token is any of the three given types, lex forward by one
- * token and return true. Otherwise return false.
- */
-static inline bool
-accept3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
-    if (match3(parser, type1, type2, type3)) {
-        parser_lex(parser);
-        return true;
-    }
-    return false;
-}
 /**
  * This function indicates that the parser expects a token in a specific
  * position. For example, if you're parsing a BEGIN block, you know that a { is
@@ -13146,20 +13195,6 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
     parser->previous.type = PM_TOKEN_MISSING;
 }
-/**
- * This function is the same as expect2, but it expects one of three token types.
- */
-static void
-expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_diagnostic_id_t diag_id) {
-    if (accept3(parser, type1, type2, type3)) return;
-    const uint8_t *location = parser->previous.end;
-    pm_parser_err(parser, location, location, diag_id);
-    parser->previous.start = location;
-    parser->previous.type = PM_TOKEN_MISSING;
-}
 /**
  * A special expect1 that expects a heredoc terminator and handles popping the
  * lex mode accordingly.
@@ -13501,7 +13536,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
                 return (pm_node_t *) pm_index_target_node_create(parser, call);
             }
         }
-        /* fallthrough */
+        PRISM_FALLTHROUGH
         default:
             // In this case we have a node that we don't know how to convert
             // into a target. We need to treat it as an error. For now, we'll
@@ -13583,7 +13618,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
         case PM_BACK_REFERENCE_READ_NODE:
         case PM_NUMBERED_REFERENCE_READ_NODE:
             PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
-            /* fallthrough */
+            PRISM_FALLTHROUGH
         case PM_GLOBAL_VARIABLE_READ_NODE: {
             pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
             pm_node_destroy(parser, target);
@@ -13710,6 +13745,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
                 // Replace the name with "[]=".
                 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
+                // Ensure that the arguments for []= don't contain keywords
+                pm_index_arguments_check(parser, call->arguments, call->block);
                 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
                 return target;
@@ -13722,7 +13760,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
             // is no way for us to attach it to the tree at this point.
             pm_node_destroy(parser, value);
         }
-        /* fallthrough */
+        PRISM_FALLTHROUGH
         default:
             // In this case we have a node that we don't know how to convert into a
             // target. We need to treat it as an error. For now, we'll mark it as an
@@ -14191,6 +14229,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
                 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
                     pm_parser_scope_forwarding_positionals_check(parser, &operator);
                     argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
+                    if (parsed_bare_hash) {
+                        pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
+                    }
                 } else {
                     pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
@@ -14239,7 +14280,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
                     }
                 }
             }
-            /* fallthrough */
+            PRISM_FALLTHROUGH
             default: {
                 if (argument == NULL) {
                     argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
@@ -14482,6 +14523,7 @@ parse_parameters(
     bool allows_trailing_comma,
     bool allows_forwarding_parameters,
     bool accepts_blocks_in_defaults,
+    bool in_block,
     uint16_t depth
 ) {
     pm_do_loop_stack_push(parser, false);
@@ -14646,7 +14688,7 @@ parse_parameters(
                 break;
             }
             case PM_TOKEN_LABEL: {
-                if (!uses_parentheses) parser->in_keyword_arg = true;
+                if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
                 update_parameter_state(parser, &parser->current, &order);
                 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
@@ -15115,7 +15157,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
                 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
                 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
                 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
-                default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
+                default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
             }
             else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
@@ -15210,6 +15252,7 @@ parse_block_parameters(
             allows_trailing_comma,
             false,
             accepts_blocks_in_defaults,
+            true,
             (uint16_t) (depth + 1)
         );
     }
@@ -16125,7 +16168,7 @@ parse_operator_symbol_name(const pm_token_t *name) {
         case PM_TOKEN_TILDE:
         case PM_TOKEN_BANG:
             if (name->end[-1] == '@') return name->end - 1;
-        /* fallthrough */
+        PRISM_FALLTHROUGH
         default:
             return name->end;
     }
@@ -16381,14 +16424,15 @@ static pm_node_t *
 parse_variable(pm_parser_t *parser) {
     pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
     int depth;
+    bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
-    if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
+    if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
         return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
     }
     pm_scope_t *current_scope = parser->current_scope;
     if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
-        if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
+        if (is_numbered_param) {
             // When you use a numbered parameter, it implies the existence of
             // all of the locals that exist before it. For example, referencing
             // _2 means that _1 must exist. Therefore here we loop through all
@@ -17096,7 +17140,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
                 break;
             }
         }
-        /* fallthrough */
+        PRISM_FALLTHROUGH
         default: {
             // If we get anything else, then this is an error. For this we'll
             // create a missing node for the value and create an assoc node for
@@ -17592,7 +17636,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
                 break;
             }
         }
-        /* fallthrough */
+        PRISM_FALLTHROUGH
         default:
             node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
             break;
@@ -17614,7 +17658,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
         // Gather up all of the patterns into the list.
         while (accept1(parser, PM_TOKEN_COMMA)) {
             // Break early here in case we have a trailing comma.
-            if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
+            if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
                 node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
                 pm_node_list_append(&nodes, node);
                 trailing_rest = true;
@@ -18722,7 +18766,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                         pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
                     }
                 }
-                /* fallthrough */
+                PRISM_FALLTHROUGH
                 default:
                     return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
             }
@@ -19213,6 +19257,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             context_push(parser, PM_CONTEXT_DEF_PARAMS);
             parser_lex(parser);
+            // This will be false if the method name is not a valid identifier
+            // but could be followed by an operator.
+            bool valid_name = true;
             switch (parser->current.type) {
                 case PM_CASE_OPERATOR:
                     pm_parser_scope_push(parser, true);
@@ -19242,10 +19290,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                     break;
                 }
-                case PM_TOKEN_CONSTANT:
                 case PM_TOKEN_INSTANCE_VARIABLE:
                 case PM_TOKEN_CLASS_VARIABLE:
                 case PM_TOKEN_GLOBAL_VARIABLE:
+                    valid_name = false;
+                    PRISM_FALLTHROUGH
+                case PM_TOKEN_CONSTANT:
                 case PM_TOKEN_KEYWORD_NIL:
                 case PM_TOKEN_KEYWORD_SELF:
                 case PM_TOKEN_KEYWORD_TRUE:
@@ -19303,6 +19353,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                         name = parse_method_definition_name(parser);
                     } else {
+                        if (!valid_name) {
+                            PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
+                        }
                         name = identifier;
                     }
                     break;
@@ -19353,7 +19407,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                     if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
                         params = NULL;
                     } else {
-                        params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, (uint16_t) (depth + 1));
+                        params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
                     }
                     lex_state_set(parser, PM_LEX_STATE_BEG);
@@ -19378,7 +19432,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                     lparen = not_provided(parser);
                     rparen = not_provided(parser);
-                    params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, (uint16_t) (depth + 1));
+                    params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
                     context_pop(parser);
                     break;
@@ -19787,9 +19841,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             pm_do_loop_stack_pop(parser);
             context_pop(parser);
-            expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
-            pm_statements_node_t *statements = NULL;
+            pm_token_t do_keyword;
+            if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
+                do_keyword = parser->previous;
+            } else {
+                do_keyword = not_provided(parser);
+                expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
+            }
+            pm_statements_node_t *statements = NULL;
             if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
                 pm_accepts_block_stack_push(parser, true);
                 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
@@ -19800,7 +19860,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
             expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
-            return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
+            return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
         }
         case PM_TOKEN_KEYWORD_WHILE: {
             size_t opening_newline_index = token_newline_index(parser);
@@ -19815,9 +19875,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             pm_do_loop_stack_pop(parser);
             context_pop(parser);
-            expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
-            pm_statements_node_t *statements = NULL;
+            pm_token_t do_keyword;
+            if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
+                do_keyword = parser->previous;
+            } else {
+                do_keyword = not_provided(parser);
+                expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
+            }
+            pm_statements_node_t *statements = NULL;
             if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
                 pm_accepts_block_stack_push(parser, true);
                 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
@@ -19828,7 +19894,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
             expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
-            return (pm_node_t *) pm_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
+            return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
         }
         case PM_TOKEN_PERCENT_LOWER_I: {
             parser_lex(parser);
@@ -20898,7 +20964,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                         pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
                     }
                 }
-                /* fallthrough */
+                PRISM_FALLTHROUGH
                 case PM_CASE_WRITABLE: {
                     parser_lex(parser);
                     pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
@@ -20944,7 +21010,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                 case PM_BACK_REFERENCE_READ_NODE:
                 case PM_NUMBERED_REFERENCE_READ_NODE:
                     PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
-                /* fallthrough */
+                PRISM_FALLTHROUGH
                 case PM_GLOBAL_VARIABLE_READ_NODE: {
                     parser_lex(parser);
@@ -21062,7 +21128,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                 case PM_BACK_REFERENCE_READ_NODE:
                 case PM_NUMBERED_REFERENCE_READ_NODE:
                     PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
-                /* fallthrough */
+                PRISM_FALLTHROUGH
                 case PM_GLOBAL_VARIABLE_READ_NODE: {
                     parser_lex(parser);
@@ -21190,7 +21256,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                 case PM_BACK_REFERENCE_READ_NODE:
                 case PM_NUMBERED_REFERENCE_READ_NODE:
                     PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
-                /* fallthrough */
+                PRISM_FALLTHROUGH
                 case PM_GLOBAL_VARIABLE_READ_NODE: {
                     parser_lex(parser);
@@ -21400,6 +21466,33 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
         case PM_TOKEN_STAR:
         case PM_TOKEN_STAR_STAR: {
             parser_lex(parser);
+            pm_token_t operator = parser->previous;
+            switch (PM_NODE_TYPE(node)) {
+                case PM_RESCUE_MODIFIER_NODE: {
+                    pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
+                    if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                    }
+                    break;
+                }
+                case PM_AND_NODE: {
+                    pm_and_node_t *cast = (pm_and_node_t *) node;
+                    if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                    }
+                    break;
+                }
+                case PM_OR_NODE: {
+                    pm_or_node_t *cast = (pm_or_node_t *) node;
+                    if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                    }
+                    break;
+                }
+                default:
+                    break;
+            }
             pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
             return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
         }
@@ -21427,6 +21520,32 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
             }
+            switch (PM_NODE_TYPE(node)) {
+                case PM_RESCUE_MODIFIER_NODE: {
+                    pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
+                    if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                    }
+                    break;
+                }
+                case PM_AND_NODE: {
+                    pm_and_node_t *cast = (pm_and_node_t *) node;
+                    if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                    }
+                    break;
+                }
+                case PM_OR_NODE: {
+                    pm_or_node_t *cast = (pm_or_node_t *) node;
+                    if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                    }
+                    break;
+                }
+                default:
+                    break;
+            }
             pm_token_t message;
             switch (parser->current.type) {
@@ -21774,6 +21893,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
             if (pm_symbol_node_label_p(node)) {
                 return node;
             }
+            break;
         default:
             break;
     }
@@ -22643,3 +22763,166 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
 }
 #endif
+/******************************************************************************/
+/* Slice queries for the Ruby API                                             */
+/******************************************************************************/
+/** The category of slice returned from pm_slice_type. */
+typedef enum {
+    /** Returned when the given encoding name is invalid. */
+    PM_SLICE_TYPE_ERROR = -1,
+    /** Returned when no other types apply to the slice. */
+    PM_SLICE_TYPE_NONE,
+    /** Returned when the slice is a valid local variable name. */
+    PM_SLICE_TYPE_LOCAL,
+    /** Returned when the slice is a valid constant name. */
+    PM_SLICE_TYPE_CONSTANT,
+    /** Returned when the slice is a valid method name. */
+    PM_SLICE_TYPE_METHOD_NAME
+} pm_slice_type_t;
+/**
+ * Check that the slice is a valid local variable name or constant.
+ */
+pm_slice_type_t
+pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
+    // first, get the right encoding object
+    const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
+    if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
+    // check that there is at least one character
+    if (length == 0) return PM_SLICE_TYPE_NONE;
+    size_t width;
+    if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
+        // valid because alphabetical
+    } else if (*source == '_') {
+        // valid because underscore
+        width = 1;
+    } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
+        // valid because multibyte
+    } else {
+        // invalid because no match
+        return PM_SLICE_TYPE_NONE;
+    }
+    // determine the type of the slice based on the first character
+    const uint8_t *end = source + length;
+    pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
+    // next, iterate through all of the bytes of the string to ensure that they
+    // are all valid identifier characters
+    source += width;
+    while (source < end) {
+        if ((width = encoding->alnum_char(source, end - source)) != 0) {
+            // valid because alphanumeric
+            source += width;
+        } else if (*source == '_') {
+            // valid because underscore
+            source++;
+        } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
+            // valid because multibyte
+            source += width;
+        } else {
+            // invalid because no match
+            break;
+        }
+    }
+    // accept a ! or ? at the end of the slice as a method name
+    if (*source == '!' || *source == '?' || *source == '=') {
+        source++;
+        result = PM_SLICE_TYPE_METHOD_NAME;
+    }
+    // valid if we are at the end of the slice
+    return source == end ? result : PM_SLICE_TYPE_NONE;
+}
+/**
+ * Check that the slice is a valid local variable name.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t
+pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
+    switch (pm_slice_type(source, length, encoding_name)) {
+        case PM_SLICE_TYPE_ERROR:
+            return PM_STRING_QUERY_ERROR;
+        case PM_SLICE_TYPE_NONE:
+        case PM_SLICE_TYPE_CONSTANT:
+        case PM_SLICE_TYPE_METHOD_NAME:
+            return PM_STRING_QUERY_FALSE;
+        case PM_SLICE_TYPE_LOCAL:
+            return PM_STRING_QUERY_TRUE;
+    }
+    assert(false && "unreachable");
+    return PM_STRING_QUERY_FALSE;
+}
+/**
+ * Check that the slice is a valid constant name.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t
+pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
+    switch (pm_slice_type(source, length, encoding_name)) {
+        case PM_SLICE_TYPE_ERROR:
+            return PM_STRING_QUERY_ERROR;
+        case PM_SLICE_TYPE_NONE:
+        case PM_SLICE_TYPE_LOCAL:
+        case PM_SLICE_TYPE_METHOD_NAME:
+            return PM_STRING_QUERY_FALSE;
+        case PM_SLICE_TYPE_CONSTANT:
+            return PM_STRING_QUERY_TRUE;
+    }
+    assert(false && "unreachable");
+    return PM_STRING_QUERY_FALSE;
+}
+/**
+ * Check that the slice is a valid method name.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t
+pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
+#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
+#define C1(c) (*source == c)
+#define C2(s) (memcmp(source, s, 2) == 0)
+#define C3(s) (memcmp(source, s, 3) == 0)
+    switch (pm_slice_type(source, length, encoding_name)) {
+        case PM_SLICE_TYPE_ERROR:
+            return PM_STRING_QUERY_ERROR;
+        case PM_SLICE_TYPE_NONE:
+            break;
+        case PM_SLICE_TYPE_LOCAL:
+            // numbered parameters are not valid method names
+            return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
+        case PM_SLICE_TYPE_CONSTANT:
+            // all constants are valid method names
+        case PM_SLICE_TYPE_METHOD_NAME:
+            // all method names are valid method names
+            return PM_STRING_QUERY_TRUE;
+    }
+    switch (length) {
+        case 1:
+            return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
+        case 2:
+            return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
+        case 3:
+            return B(C3("===") || C3("<=>") || C3("[]="));
+        default:
+            return PM_STRING_QUERY_FALSE;
+    }
+#undef B
+#undef C1
+#undef C2
+#undef C3
+}