RubyGems - prism - Versions diffs - 0.19.0 → 0.20.0 - Mend

prism 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +29 -1
data/Makefile +5 -0
data/README.md +8 -6
data/config.yml +236 -38
data/docs/build_system.md +19 -2
data/docs/cruby_compilation.md +27 -0
data/docs/parser_translation.md +34 -0
data/docs/parsing_rules.md +19 -0
data/docs/releasing.md +3 -3
data/docs/ruby_api.md +1 -1
data/docs/serialization.md +17 -5
data/ext/prism/api_node.c +101 -81
data/ext/prism/extension.c +74 -11
data/ext/prism/extension.h +1 -1
data/include/prism/ast.h +1699 -504
data/include/prism/defines.h +8 -0
data/include/prism/diagnostic.h +39 -2
data/include/prism/encoding.h +10 -0
data/include/prism/options.h +40 -14
data/include/prism/parser.h +33 -17
data/include/prism/util/pm_buffer.h +9 -0
data/include/prism/util/pm_constant_pool.h +7 -0
data/include/prism/util/pm_newline_list.h +0 -11
data/include/prism/version.h +2 -2
data/include/prism.h +19 -2
data/lib/prism/debug.rb +11 -5
data/lib/prism/dot_visitor.rb +36 -14
data/lib/prism/dsl.rb +22 -22
data/lib/prism/ffi.rb +2 -2
data/lib/prism/node.rb +1020 -737
data/lib/prism/node_ext.rb +2 -2
data/lib/prism/parse_result.rb +17 -9
data/lib/prism/serialize.rb +53 -29
data/lib/prism/translation/parser/compiler.rb +1831 -0
data/lib/prism/translation/parser/lexer.rb +335 -0
data/lib/prism/translation/parser/rubocop.rb +37 -0
data/lib/prism/translation/parser.rb +163 -0
data/lib/prism/translation.rb +11 -0
data/lib/prism.rb +1 -0
data/prism.gemspec +12 -5
data/rbi/prism.rbi +150 -88
data/rbi/prism_static.rbi +15 -3
data/sig/prism.rbs +996 -961
data/sig/prism_static.rbs +123 -46
data/src/diagnostic.c +259 -219
data/src/encoding.c +4 -8
data/src/node.c +2 -6
data/src/options.c +24 -5
data/src/prettyprint.c +174 -42
data/src/prism.c +1136 -328
data/src/serialize.c +12 -9
data/src/token_type.c +353 -4
data/src/util/pm_buffer.c +11 -0
data/src/util/pm_constant_pool.c +12 -11
data/src/util/pm_newline_list.c +2 -14
metadata +10 -3
data/docs/building.md +0 -29

data/src/serialize.c CHANGED Viewed

@@ -158,11 +158,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
         }
         case PM_ASSOC_NODE: {
             pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->key, buffer);
-            if (((pm_assoc_node_t *)node)->value == NULL) {
-                pm_buffer_append_byte(buffer, 0);
-            } else {
-                pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->value, buffer);
-            }
+            pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->value, buffer);
             if (((pm_assoc_node_t *)node)->operator_loc.start == NULL) {
                 pm_buffer_append_byte(buffer, 0);
             } else {
@@ -229,6 +225,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
             break;
         }
         case PM_BLOCK_LOCAL_VARIABLE_NODE: {
+            pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
             pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_local_variable_node_t *)node)->name));
             break;
         }
@@ -238,7 +235,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
             for (uint32_t index = 0; index < locals_size; index++) {
                 pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_node_t *)node)->locals.ids[index]));
             }
-            pm_buffer_append_varuint(buffer, ((pm_block_node_t *)node)->locals_body_index);
             if (((pm_block_node_t *)node)->parameters == NULL) {
                 pm_buffer_append_byte(buffer, 0);
             } else {
@@ -254,6 +250,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
             break;
         }
         case PM_BLOCK_PARAMETER_NODE: {
+            pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
             pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_parameter_node_t *)node)->name));
             if (((pm_block_parameter_node_t *)node)->name_loc.start == NULL) {
                 pm_buffer_append_byte(buffer, 0);
@@ -651,7 +648,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
             for (uint32_t index = 0; index < locals_size; index++) {
                 pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_def_node_t *)node)->locals.ids[index]));
             }
-            pm_buffer_append_varuint(buffer, ((pm_def_node_t *)node)->locals_body_index);
             pm_serialize_location(parser, &((pm_def_node_t *)node)->def_keyword_loc, buffer);
             if (((pm_def_node_t *)node)->operator_loc.start == NULL) {
                 pm_buffer_append_byte(buffer, 0);
@@ -1190,6 +1186,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
             break;
         }
         case PM_KEYWORD_REST_PARAMETER_NODE: {
+            pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
             pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_keyword_rest_parameter_node_t *)node)->name));
             if (((pm_keyword_rest_parameter_node_t *)node)->name_loc.start == NULL) {
                 pm_buffer_append_byte(buffer, 0);
@@ -1206,7 +1203,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
             for (uint32_t index = 0; index < locals_size; index++) {
                 pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_lambda_node_t *)node)->locals.ids[index]));
             }
-            pm_buffer_append_varuint(buffer, ((pm_lambda_node_t *)node)->locals_body_index);
             pm_serialize_location(parser, &((pm_lambda_node_t *)node)->operator_loc, buffer);
             pm_serialize_location(parser, &((pm_lambda_node_t *)node)->opening_loc, buffer);
             pm_serialize_location(parser, &((pm_lambda_node_t *)node)->closing_loc, buffer);
@@ -1402,12 +1398,14 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
             break;
         }
         case PM_OPTIONAL_KEYWORD_PARAMETER_NODE: {
+            pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
             pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_optional_keyword_parameter_node_t *)node)->name));
             pm_serialize_location(parser, &((pm_optional_keyword_parameter_node_t *)node)->name_loc, buffer);
             pm_serialize_node(parser, (pm_node_t *)((pm_optional_keyword_parameter_node_t *)node)->value, buffer);
             break;
         }
         case PM_OPTIONAL_PARAMETER_NODE: {
+            pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
             pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_optional_parameter_node_t *)node)->name));
             pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->name_loc, buffer);
             pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->operator_loc, buffer);
@@ -1542,11 +1540,13 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
             break;
         }
         case PM_REQUIRED_KEYWORD_PARAMETER_NODE: {
+            pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
             pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_required_keyword_parameter_node_t *)node)->name));
             pm_serialize_location(parser, &((pm_required_keyword_parameter_node_t *)node)->name_loc, buffer);
             break;
         }
         case PM_REQUIRED_PARAMETER_NODE: {
+            pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
             pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_required_parameter_node_t *)node)->name));
             break;
         }
@@ -1587,6 +1587,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
             break;
         }
         case PM_REST_PARAMETER_NODE: {
+            pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
             pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_rest_parameter_node_t *)node)->name));
             if (((pm_rest_parameter_node_t *)node)->name_loc.start == NULL) {
                 pm_buffer_append_byte(buffer, 0);
@@ -1904,6 +1905,8 @@ pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buf
     // serialize location
     pm_serialize_location(parser, &diagnostic->location, buffer);
+    pm_buffer_append_byte(buffer, diagnostic->level);
 }
 static void
@@ -1926,7 +1929,7 @@ pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer) {
     pm_buffer_append_string(buffer, encoding->name, encoding_length);
 }
-#line 216 "serialize.c.erb"
+#line 218 "serialize.c.erb"
 /**
  * Serialize the encoding, metadata, nodes, and constant pool.
  */

data/src/token_type.c CHANGED Viewed

@@ -13,8 +13,7 @@
  * Returns a string representation of the given token type.
  */
 PRISM_EXPORTED_FUNCTION const char *
-pm_token_type_to_str(pm_token_type_t token_type)
-{
+pm_token_type_name(pm_token_type_t token_type) {
     switch (token_type) {
         case PM_TOKEN_EOF:
             return "EOF";
@@ -345,7 +344,357 @@ pm_token_type_to_str(pm_token_type_t token_type)
         case PM_TOKEN___END__:
             return "__END__";
         case PM_TOKEN_MAXIMUM:
-            return "MAXIMUM";
+            assert(false && "unreachable");
+            return "";
     }
-    return "\0";
+    // Provide a default, because some compilers can't determine that the above
+    // switch is exhaustive.
+    assert(false && "unreachable");
+    return "";
+}
+/**
+ * Returns the human name of the given token type.
+ */
+const char *
+pm_token_type_human(pm_token_type_t token_type) {
+    switch (token_type) {
+        case PM_TOKEN_EOF:
+            return "end of file";
+        case PM_TOKEN_MISSING:
+            return "missing token";
+        case PM_TOKEN_NOT_PROVIDED:
+            return "not provided token";
+        case PM_TOKEN_AMPERSAND:
+            return "'&'";
+        case PM_TOKEN_AMPERSAND_AMPERSAND:
+            return "'&&'";
+        case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
+            return "'&&='";
+        case PM_TOKEN_AMPERSAND_DOT:
+            return "'&.'";
+        case PM_TOKEN_AMPERSAND_EQUAL:
+            return "'&='";
+        case PM_TOKEN_BACKTICK:
+            return "'`'";
+        case PM_TOKEN_BACK_REFERENCE:
+            return "back reference";
+        case PM_TOKEN_BANG:
+            return "'!'";
+        case PM_TOKEN_BANG_EQUAL:
+            return "'!='";
+        case PM_TOKEN_BANG_TILDE:
+            return "'!~'";
+        case PM_TOKEN_BRACE_LEFT:
+            return "'{'";
+        case PM_TOKEN_BRACE_RIGHT:
+            return "'}'";
+        case PM_TOKEN_BRACKET_LEFT:
+            return "'['";
+        case PM_TOKEN_BRACKET_LEFT_ARRAY:
+            return "'['";
+        case PM_TOKEN_BRACKET_LEFT_RIGHT:
+            return "'[]'";
+        case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL:
+            return "'[]='";
+        case PM_TOKEN_BRACKET_RIGHT:
+            return "']'";
+        case PM_TOKEN_CARET:
+            return "'^'";
+        case PM_TOKEN_CARET_EQUAL:
+            return "'^='";
+        case PM_TOKEN_CHARACTER_LITERAL:
+            return "character literal";
+        case PM_TOKEN_CLASS_VARIABLE:
+            return "class variable";
+        case PM_TOKEN_COLON:
+            return "':'";
+        case PM_TOKEN_COLON_COLON:
+            return "'::'";
+        case PM_TOKEN_COMMA:
+            return "','";
+        case PM_TOKEN_COMMENT:
+            return "comment";
+        case PM_TOKEN_CONSTANT:
+            return "constant";
+        case PM_TOKEN_DOT:
+            return "'.'";
+        case PM_TOKEN_DOT_DOT:
+            return "'..'";
+        case PM_TOKEN_DOT_DOT_DOT:
+            return "'...'";
+        case PM_TOKEN_EMBDOC_BEGIN:
+            return "'=begin'";
+        case PM_TOKEN_EMBDOC_END:
+            return "'=end'";
+        case PM_TOKEN_EMBDOC_LINE:
+            return "embedded documentation line";
+        case PM_TOKEN_EMBEXPR_BEGIN:
+            return "'#{'";
+        case PM_TOKEN_EMBEXPR_END:
+            return "'}'";
+        case PM_TOKEN_EMBVAR:
+            return "'#'";
+        case PM_TOKEN_EQUAL:
+            return "'='";
+        case PM_TOKEN_EQUAL_EQUAL:
+            return "'=='";
+        case PM_TOKEN_EQUAL_EQUAL_EQUAL:
+            return "'==='";
+        case PM_TOKEN_EQUAL_GREATER:
+            return "'=>'";
+        case PM_TOKEN_EQUAL_TILDE:
+            return "'=~'";
+        case PM_TOKEN_FLOAT:
+            return "float";
+        case PM_TOKEN_FLOAT_IMAGINARY:
+            return "imaginary";
+        case PM_TOKEN_FLOAT_RATIONAL:
+            return "rational";
+        case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
+            return "imaginary";
+        case PM_TOKEN_GLOBAL_VARIABLE:
+            return "global variable";
+        case PM_TOKEN_GREATER:
+            return "'>'";
+        case PM_TOKEN_GREATER_EQUAL:
+            return "'>='";
+        case PM_TOKEN_GREATER_GREATER:
+            return "'>>'";
+        case PM_TOKEN_GREATER_GREATER_EQUAL:
+            return "'>>='";
+        case PM_TOKEN_HEREDOC_END:
+            return "heredoc ending";
+        case PM_TOKEN_HEREDOC_START:
+            return "heredoc beginning";
+        case PM_TOKEN_IDENTIFIER:
+            return "local variable or method identifier";
+        case PM_TOKEN_IGNORED_NEWLINE:
+            return "ignored newline";
+        case PM_TOKEN_INSTANCE_VARIABLE:
+            return "instance variable";
+        case PM_TOKEN_INTEGER:
+            return "integer";
+        case PM_TOKEN_INTEGER_IMAGINARY:
+            return "imaginary";
+        case PM_TOKEN_INTEGER_RATIONAL:
+            return "rational";
+        case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY:
+            return "imaginary";
+        case PM_TOKEN_KEYWORD_ALIAS:
+            return "'alias'";
+        case PM_TOKEN_KEYWORD_AND:
+            return "'and'";
+        case PM_TOKEN_KEYWORD_BEGIN:
+            return "'begin'";
+        case PM_TOKEN_KEYWORD_BEGIN_UPCASE:
+            return "'BEGIN'";
+        case PM_TOKEN_KEYWORD_BREAK:
+            return "'break'";
+        case PM_TOKEN_KEYWORD_CASE:
+            return "'case'";
+        case PM_TOKEN_KEYWORD_CLASS:
+            return "'class'";
+        case PM_TOKEN_KEYWORD_DEF:
+            return "'def'";
+        case PM_TOKEN_KEYWORD_DEFINED:
+            return "'defined?'";
+        case PM_TOKEN_KEYWORD_DO:
+            return "'do'";
+        case PM_TOKEN_KEYWORD_DO_LOOP:
+            return "'do'";
+        case PM_TOKEN_KEYWORD_ELSE:
+            return "'else'";
+        case PM_TOKEN_KEYWORD_ELSIF:
+            return "'elsif'";
+        case PM_TOKEN_KEYWORD_END:
+            return "'end'";
+        case PM_TOKEN_KEYWORD_END_UPCASE:
+            return "'END'";
+        case PM_TOKEN_KEYWORD_ENSURE:
+            return "'ensure'";
+        case PM_TOKEN_KEYWORD_FALSE:
+            return "'false'";
+        case PM_TOKEN_KEYWORD_FOR:
+            return "'for'";
+        case PM_TOKEN_KEYWORD_IF:
+            return "'if'";
+        case PM_TOKEN_KEYWORD_IF_MODIFIER:
+            return "'if'";
+        case PM_TOKEN_KEYWORD_IN:
+            return "'in'";
+        case PM_TOKEN_KEYWORD_MODULE:
+            return "'module'";
+        case PM_TOKEN_KEYWORD_NEXT:
+            return "'next'";
+        case PM_TOKEN_KEYWORD_NIL:
+            return "'nil'";
+        case PM_TOKEN_KEYWORD_NOT:
+            return "'not'";
+        case PM_TOKEN_KEYWORD_OR:
+            return "'or'";
+        case PM_TOKEN_KEYWORD_REDO:
+            return "'redo'";
+        case PM_TOKEN_KEYWORD_RESCUE:
+            return "'rescue'";
+        case PM_TOKEN_KEYWORD_RESCUE_MODIFIER:
+            return "'rescue'";
+        case PM_TOKEN_KEYWORD_RETRY:
+            return "'retry'";
+        case PM_TOKEN_KEYWORD_RETURN:
+            return "'return'";
+        case PM_TOKEN_KEYWORD_SELF:
+            return "'self'";
+        case PM_TOKEN_KEYWORD_SUPER:
+            return "'super'";
+        case PM_TOKEN_KEYWORD_THEN:
+            return "'then'";
+        case PM_TOKEN_KEYWORD_TRUE:
+            return "'true'";
+        case PM_TOKEN_KEYWORD_UNDEF:
+            return "'undef'";
+        case PM_TOKEN_KEYWORD_UNLESS:
+            return "'unless'";
+        case PM_TOKEN_KEYWORD_UNLESS_MODIFIER:
+            return "'unless'";
+        case PM_TOKEN_KEYWORD_UNTIL:
+            return "'until'";
+        case PM_TOKEN_KEYWORD_UNTIL_MODIFIER:
+            return "'until'";
+        case PM_TOKEN_KEYWORD_WHEN:
+            return "'when'";
+        case PM_TOKEN_KEYWORD_WHILE:
+            return "'while'";
+        case PM_TOKEN_KEYWORD_WHILE_MODIFIER:
+            return "'while'";
+        case PM_TOKEN_KEYWORD_YIELD:
+            return "'yield'";
+        case PM_TOKEN_KEYWORD___ENCODING__:
+            return "'__ENCODING__'";
+        case PM_TOKEN_KEYWORD___FILE__:
+            return "'__FILE__'";
+        case PM_TOKEN_KEYWORD___LINE__:
+            return "'__LINE__'";
+        case PM_TOKEN_LABEL:
+            return "label";
+        case PM_TOKEN_LABEL_END:
+            return "':'";
+        case PM_TOKEN_LAMBDA_BEGIN:
+            return "'{'";
+        case PM_TOKEN_LESS:
+            return "'<'";
+        case PM_TOKEN_LESS_EQUAL:
+            return "'<='";
+        case PM_TOKEN_LESS_EQUAL_GREATER:
+            return "'<=>'";
+        case PM_TOKEN_LESS_LESS:
+            return "'<<'";
+        case PM_TOKEN_LESS_LESS_EQUAL:
+            return "'<<='";
+        case PM_TOKEN_METHOD_NAME:
+            return "method name";
+        case PM_TOKEN_MINUS:
+            return "'-'";
+        case PM_TOKEN_MINUS_EQUAL:
+            return "'-='";
+        case PM_TOKEN_MINUS_GREATER:
+            return "'->'";
+        case PM_TOKEN_NEWLINE:
+            return "newline";
+        case PM_TOKEN_NUMBERED_REFERENCE:
+            return "numbered reference";
+        case PM_TOKEN_PARENTHESIS_LEFT:
+            return "'('";
+        case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
+            return "'('";
+        case PM_TOKEN_PARENTHESIS_RIGHT:
+            return "')'";
+        case PM_TOKEN_PERCENT:
+            return "'%'";
+        case PM_TOKEN_PERCENT_EQUAL:
+            return "'%='";
+        case PM_TOKEN_PERCENT_LOWER_I:
+            return "'%i'";
+        case PM_TOKEN_PERCENT_LOWER_W:
+            return "'%w'";
+        case PM_TOKEN_PERCENT_LOWER_X:
+            return "'%x'";
+        case PM_TOKEN_PERCENT_UPPER_I:
+            return "'%I'";
+        case PM_TOKEN_PERCENT_UPPER_W:
+            return "'%W'";
+        case PM_TOKEN_PIPE:
+            return "'|'";
+        case PM_TOKEN_PIPE_EQUAL:
+            return "'|='";
+        case PM_TOKEN_PIPE_PIPE:
+            return "'||'";
+        case PM_TOKEN_PIPE_PIPE_EQUAL:
+            return "'||='";
+        case PM_TOKEN_PLUS:
+            return "'+'";
+        case PM_TOKEN_PLUS_EQUAL:
+            return "'+='";
+        case PM_TOKEN_QUESTION_MARK:
+            return "'?'";
+        case PM_TOKEN_REGEXP_BEGIN:
+            return "regular expression beginning";
+        case PM_TOKEN_REGEXP_END:
+            return "regular expression ending";
+        case PM_TOKEN_SEMICOLON:
+            return "';'";
+        case PM_TOKEN_SLASH:
+            return "'/'";
+        case PM_TOKEN_SLASH_EQUAL:
+            return "'/='";
+        case PM_TOKEN_STAR:
+            return "'*'";
+        case PM_TOKEN_STAR_EQUAL:
+            return "'*='";
+        case PM_TOKEN_STAR_STAR:
+            return "'**'";
+        case PM_TOKEN_STAR_STAR_EQUAL:
+            return "'**='";
+        case PM_TOKEN_STRING_BEGIN:
+            return "string beginning";
+        case PM_TOKEN_STRING_CONTENT:
+            return "string content";
+        case PM_TOKEN_STRING_END:
+            return "string ending";
+        case PM_TOKEN_SYMBOL_BEGIN:
+            return "symbol beginning";
+        case PM_TOKEN_TILDE:
+            return "'~'";
+        case PM_TOKEN_UAMPERSAND:
+            return "'&'";
+        case PM_TOKEN_UCOLON_COLON:
+            return "'::'";
+        case PM_TOKEN_UDOT_DOT:
+            return "'..'";
+        case PM_TOKEN_UDOT_DOT_DOT:
+            return "'...'";
+        case PM_TOKEN_UMINUS:
+            return "'-'";
+        case PM_TOKEN_UMINUS_NUM:
+            return "'-'";
+        case PM_TOKEN_UPLUS:
+            return "'+'";
+        case PM_TOKEN_USTAR:
+            return "'*'";
+        case PM_TOKEN_USTAR_STAR:
+            return "'**'";
+        case PM_TOKEN_WORDS_SEP:
+            return "string separator";
+        case PM_TOKEN___END__:
+            return "'__END__'";
+        case PM_TOKEN_MAXIMUM:
+            assert(false && "unreachable");
+            return "";
+    }
+    // Provide a default, because some compilers can't determine that the above
+    // switch is exhaustive.
+    assert(false && "unreachable");
+    return "";
 }

data/src/util/pm_buffer.c CHANGED Viewed

@@ -160,6 +160,17 @@ pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
     pm_buffer_append_varuint(buffer, unsigned_int);
 }
+/**
+ * Prepend the given string to the buffer.
+ */
+void
+pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length) {
+    size_t cursor = buffer->length;
+    pm_buffer_append_length(buffer, length);
+    memmove(buffer->value + length, buffer->value, cursor);
+    memcpy(buffer->value, value, length);
+}
 /**
  * Concatenate one buffer onto another.
  */

data/src/util/pm_constant_pool.c CHANGED Viewed

@@ -124,13 +124,13 @@ pm_constant_pool_resize(pm_constant_pool_t *pool) {
         // If an id is set on this constant, then we know we have content here.
         // In this case we need to insert it into the next constant pool.
-        if (bucket->id != 0) {
+        if (bucket->id != PM_CONSTANT_ID_UNSET) {
             uint32_t next_index = bucket->hash & mask;
             // This implements linear scanning to find the next available slot
             // in case this index is already taken. We don't need to bother
             // comparing the values since we know that the hash is unique.
-            while (next_buckets[next_index].id != 0) {
+            while (next_buckets[next_index].id != PM_CONSTANT_ID_UNSET) {
                 next_index = (next_index + 1) & mask;
             }
@@ -177,7 +177,7 @@ pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
  */
 pm_constant_t *
 pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
-    assert(constant_id > 0 && constant_id <= pool->size);
+    assert(constant_id != PM_CONSTANT_ID_UNSET && constant_id <= pool->size);
     return &pool->constants[constant_id - 1];
 }
@@ -187,7 +187,7 @@ pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t
 static inline pm_constant_id_t
 pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
     if (pool->size >= (pool->capacity / 4 * 3)) {
-        if (!pm_constant_pool_resize(pool)) return 0;
+        if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET;
     }
     assert(is_power_of_two(pool->capacity));
@@ -197,7 +197,7 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
     uint32_t index = hash & mask;
     pm_constant_pool_bucket_t *bucket;
-    while (bucket = &pool->buckets[index], bucket->id != 0) {
+    while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
         // If there is a collision, then we need to check if the content is the
         // same as the content we are trying to insert. If it is, then we can
         // return the id of the existing constant.
@@ -248,8 +248,8 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
 }
 /**
- * Insert a constant into a constant pool. Returns the id of the constant, or 0
- * if any potential calls to resize fail.
+ * Insert a constant into a constant pool. Returns the id of the constant, or
+ * PM_CONSTANT_ID_UNSET if any potential calls to resize fail.
  */
 pm_constant_id_t
 pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
@@ -258,8 +258,8 @@ pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, s
 /**
  * Insert a constant into a constant pool from memory that is now owned by the
- * constant pool. Returns the id of the constant, or 0 if any potential calls to
- * resize fail.
+ * constant pool. Returns the id of the constant, or PM_CONSTANT_ID_UNSET if any
+ * potential calls to resize fail.
  */
 pm_constant_id_t
 pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
@@ -268,7 +268,8 @@ pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, si
 /**
  * Insert a constant into a constant pool from memory that is constant. Returns
- * the id of the constant, or 0 if any potential calls to resize fail.
+ * the id of the constant, or PM_CONSTANT_ID_UNSET if any potential calls to
+ * resize fail.
  */
 pm_constant_id_t
 pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
@@ -286,7 +287,7 @@ pm_constant_pool_free(pm_constant_pool_t *pool) {
         pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
         // If an id is set on this constant, then we know we have content here.
-        if (bucket->id != 0 && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
+        if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
             pm_constant_t *constant = &pool->constants[bucket->id - 1];
             free((void *) constant->start);
         }

data/src/util/pm_newline_list.c CHANGED Viewed

@@ -45,18 +45,6 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
     return true;
 }
-/**
- * Conditionally append a new offset to the newline list, if the value passed in
- * is a newline.
- */
-bool
-pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) {
-    if (*cursor != '\n') {
-        return true;
-    }
-    return pm_newline_list_append(list, cursor);
-}
 /**
  * Returns the line and column of the given offset. If the offset is not in the
  * list, the line and column of the closest offset less than the given offset
@@ -74,7 +62,7 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
         size_t mid = left + (right - left) / 2;
         if (list->offsets[mid] == offset) {
-            return ((pm_line_column_t) { mid, 0 });
+            return ((pm_line_column_t) { mid + 1, 0 });
         }
         if (list->offsets[mid] < offset) {
@@ -84,7 +72,7 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
         }
     }
-    return ((pm_line_column_t) { left - 1, offset - list->offsets[left - 1] });
+    return ((pm_line_column_t) { left, offset - list->offsets[left - 1] });
 }
 /**

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: prism
 version: !ruby/object:Gem::Version
-  version: 0.19.0
+  version: 0.20.0
 platform: ruby
 authors:
 - Shopify
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-12-14 00:00:00.000000000 Z
+date: 2024-02-01 00:00:00.000000000 Z
 dependencies: []
 description:
 email:
@@ -26,8 +26,8 @@ files:
 - README.md
 - config.yml
 - docs/build_system.md
-- docs/building.md
 - docs/configuration.md
+- docs/cruby_compilation.md
 - docs/design.md
 - docs/encoding.md
 - docs/fuzzing.md
@@ -35,6 +35,8 @@ files:
 - docs/javascript.md
 - docs/local_variable_depth.md
 - docs/mapping.md
+- docs/parser_translation.md
+- docs/parsing_rules.md
 - docs/releasing.md
 - docs/ripper.md
 - docs/ruby_api.md
@@ -88,6 +90,11 @@ files:
 - lib/prism/pattern.rb
 - lib/prism/ripper_compat.rb
 - lib/prism/serialize.rb
+- lib/prism/translation.rb
+- lib/prism/translation/parser.rb
+- lib/prism/translation/parser/compiler.rb
+- lib/prism/translation/parser/lexer.rb
+- lib/prism/translation/parser/rubocop.rb
 - lib/prism/visitor.rb
 - prism.gemspec
 - rbi/prism.rbi