RubyGems - prism - Versions diffs - 0.21.0 → 0.23.0 - Mend

prism 0.21.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +40 -1
data/README.md +2 -1
data/docs/releasing.md +84 -16
data/docs/ruby_parser_translation.md +19 -0
data/docs/serialization.md +2 -0
data/ext/prism/api_node.c +784 -785
data/ext/prism/extension.c +56 -19
data/ext/prism/extension.h +2 -2
data/include/prism/diagnostic.h +11 -6
data/include/prism/encoding.h +7 -0
data/include/prism/util/pm_constant_pool.h +1 -1
data/include/prism/util/pm_strpbrk.h +4 -1
data/include/prism/version.h +2 -2
data/lib/prism/ffi.rb +8 -3
data/lib/prism/lex_compat.rb +17 -1
data/lib/prism/node.rb +212 -32
data/lib/prism/node_ext.rb +25 -2
data/lib/prism/parse_result.rb +46 -16
data/lib/prism/serialize.rb +14 -6
data/lib/prism/translation/parser/compiler.rb +16 -6
data/lib/prism/translation/parser.rb +19 -12
data/lib/prism/translation/ripper.rb +577 -0
data/lib/prism/translation/ruby_parser.rb +1521 -0
data/lib/prism/translation.rb +3 -3
data/lib/prism.rb +0 -1
data/prism.gemspec +5 -3
data/src/diagnostic.c +20 -15
data/src/encoding.c +16 -17
data/src/options.c +7 -2
data/src/prism.c +145 -90
data/src/serialize.c +24 -13
data/src/token_type.c +3 -3
data/src/util/pm_constant_pool.c +1 -1
data/src/util/pm_string.c +0 -7
data/src/util/pm_strpbrk.c +122 -14
metadata +6 -4
data/lib/prism/ripper_compat.rb +0 -207

data/lib/prism/translation.rb CHANGED Viewed

@@ -2,10 +2,10 @@
 module Prism
   # This module is responsible for converting the prism syntax tree into other
-  # syntax trees. At the moment it only supports converting to the
-  # whitequark/parser gem's syntax tree, but support is planned for the
-  # seattlerb/ruby_parser gem's syntax tree as well.
+  # syntax trees.
   module Translation
     autoload :Parser, "prism/translation/parser"
+    autoload :Ripper, "prism/translation/ripper"
+    autoload :RubyParser, "prism/translation/ruby_parser"
   end
 end

data/lib/prism.rb CHANGED Viewed

@@ -22,7 +22,6 @@ module Prism
   autoload :LexRipper, "prism/lex_compat"
   autoload :MutationCompiler, "prism/mutation_compiler"
   autoload :NodeInspector, "prism/node_inspector"
-  autoload :RipperCompat, "prism/ripper_compat"
   autoload :Pack, "prism/pack"
   autoload :Pattern, "prism/pattern"
   autoload :Serialize, "prism/serialize"

data/prism.gemspec CHANGED Viewed

@@ -2,7 +2,7 @@
 Gem::Specification.new do |spec|
   spec.name = "prism"
-  spec.version = "0.21.0"
+  spec.version = "0.23.0"
   spec.authors = ["Shopify"]
   spec.email = ["ruby@shopify.com"]
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
   spec.homepage = "https://github.com/ruby/prism"
   spec.license = "MIT"
-  spec.required_ruby_version = ">= 3.0.0"
+  spec.required_ruby_version = ">= 2.7.0"
   spec.require_paths = ["lib"]
   spec.files = [
@@ -36,6 +36,7 @@ Gem::Specification.new do |spec|
     "docs/releasing.md",
     "docs/ripper.md",
     "docs/ruby_api.md",
+    "docs/ruby_parser_translation.md",
     "docs/serialization.md",
     "docs/testing.md",
     "ext/prism/api_node.c",
@@ -83,13 +84,14 @@ Gem::Specification.new do |spec|
     "lib/prism/parse_result/comments.rb",
     "lib/prism/parse_result/newlines.rb",
     "lib/prism/pattern.rb",
-    "lib/prism/ripper_compat.rb",
     "lib/prism/serialize.rb",
     "lib/prism/translation.rb",
     "lib/prism/translation/parser.rb",
     "lib/prism/translation/parser/compiler.rb",
     "lib/prism/translation/parser/lexer.rb",
     "lib/prism/translation/parser/rubocop.rb",
+    "lib/prism/translation/ripper.rb",
+    "lib/prism/translation/ruby_parser.rb",
     "lib/prism/visitor.rb",
     "src/diagnostic.c",
     "src/encoding.c",

data/src/diagnostic.c CHANGED Viewed

@@ -63,7 +63,8 @@ typedef struct {
  *
  * For errors, they are:
  *
- * * `PM_ERROR_LEVEL_FATAL` - The level for all errors.
+ * * `PM_ERROR_LEVEL_FATAL` - The default level for errors.
+ * * `PM_ERROR_LEVEL_ARGUMENT` - Errors that should raise ArgumentError.
  *
  * For warnings, they are:
  *
@@ -71,9 +72,13 @@ typedef struct {
  * * `PM_WARNING_LEVEL_VERBOSE` - Warnings that appear with `-w`, as in `ruby -w -c -e 'code'`.
  */
 static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
+    // Special error that can be replaced
     [PM_ERR_CANNOT_PARSE_EXPRESSION]            = { "cannot parse the expression", PM_ERROR_LEVEL_FATAL },
-    // Errors
+    // Errors that should raise argument errors
+    [PM_ERR_INVALID_ENCODING_MAGIC_COMMENT]     = { "unknown or invalid encoding in the magic comment", PM_ERROR_LEVEL_ARGUMENT },
+    // Errors that should raise syntax errors
     [PM_ERR_ALIAS_ARGUMENT]                     = { "invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_AMPAMPEQ_MULTI_ASSIGN]              = { "unexpected `&&=` in a multiple assignment", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_ARGUMENT_AFTER_BLOCK]               = { "unexpected argument after a block argument", PM_ERROR_LEVEL_FATAL },
@@ -154,7 +159,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
     [PM_ERR_ESCAPE_INVALID_UNICODE_LONG]        = { "invalid Unicode escape sequence; maximum length is 6 digits", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_ESCAPE_INVALID_UNICODE_TERM]        = { "invalid Unicode escape sequence; needs closing `}`", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_EXPECT_ARGUMENT]                    = { "expected an argument", PM_ERROR_LEVEL_FATAL },
-    [PM_ERR_EXPECT_EOL_AFTER_STATEMENT]         = { "expected a newline or semicolon after the statement", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_EXPECT_EOL_AFTER_STATEMENT]         = { "unexpected %s, expecting end-of-input", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ]   = { "expected an expression after `&&=`", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ] = { "expected an expression after `||=`", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA]      = { "expected an expression after `,`", PM_ERROR_LEVEL_FATAL },
@@ -179,24 +184,25 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
     [PM_ERR_FOR_IN]                             = { "expected an `in` after the index in a `for` statement", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_FOR_TERM]                           = { "expected an `end` to close the `for` loop", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_HASH_EXPRESSION_AFTER_LABEL]        = { "expected an expression after the label in a hash", PM_ERROR_LEVEL_FATAL },
-    [PM_ERR_HASH_KEY]                           = { "expected a key in the hash literal", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_HASH_KEY]                           = { "unexpected %s, expecting '}' or a key in the hash literal", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_HASH_ROCKET]                        = { "expected a `=>` between the hash key and value", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_HASH_TERM]                          = { "expected a `}` to close the hash literal", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_HASH_VALUE]                         = { "expected a value in the hash literal", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_HEREDOC_TERM]                       = { "could not find a terminator for the heredoc", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_INCOMPLETE_QUESTION_MARK]           = { "incomplete expression at `?`", PM_ERROR_LEVEL_FATAL },
-    [PM_ERR_INCOMPLETE_VARIABLE_CLASS]          = { "incomplete class variable", PM_ERROR_LEVEL_FATAL },
-    [PM_ERR_INCOMPLETE_VARIABLE_INSTANCE]       = { "incomplete instance variable", PM_ERROR_LEVEL_FATAL },
-    [PM_ERR_INVALID_ENCODING_MAGIC_COMMENT]     = { "unknown or invalid encoding in the magic comment", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_INCOMPLETE_VARIABLE_CLASS]          = { "`%.*s' is not allowed as a class variable name", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_INCOMPLETE_VARIABLE_INSTANCE]       = { "`%.*s' is not allowed as an instance variable name", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_INVALID_FLOAT_EXPONENT]             = { "invalid exponent", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_INVALID_NUMBER_BINARY]              = { "invalid binary number", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_INVALID_NUMBER_DECIMAL]             = { "invalid decimal number", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_INVALID_NUMBER_HEXADECIMAL]         = { "invalid hexadecimal number", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_INVALID_NUMBER_OCTAL]               = { "invalid octal number", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_INVALID_NUMBER_UNDERSCORE]          = { "invalid underscore placement in number", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_INVALID_CHARACTER]                  = { "invalid character 0x%X", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_INVALID_MULTIBYTE_CHARACTER]        = { "invalid multibyte character 0x%X", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_INVALID_PRINTABLE_CHARACTER]        = { "invalid character `%c`", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_INVALID_PERCENT]                    = { "invalid `%` token", PM_ERROR_LEVEL_FATAL }, // TODO WHAT?
-    [PM_ERR_INVALID_TOKEN]                      = { "invalid token", PM_ERROR_LEVEL_FATAL }, // TODO WHAT?
-    [PM_ERR_INVALID_VARIABLE_GLOBAL]            = { "invalid global variable", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_INVALID_VARIABLE_GLOBAL]            = { "`%.*s' is not allowed as a global variable name", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_IT_NOT_ALLOWED]                     = { "`it` is not allowed when an ordinary parameter is defined", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_LAMBDA_OPEN]                        = { "expected a `do` keyword or a `{` to open the lambda block", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_LAMBDA_TERM_BRACE]                  = { "expected a lambda block beginning with `{` to end with `}`", PM_ERROR_LEVEL_FATAL },
@@ -215,6 +221,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
     [PM_ERR_MODULE_NAME]                        = { "expected a constant name after `module`", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_MODULE_TERM]                        = { "expected an `end` to close the `module` statement", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_MULTI_ASSIGN_MULTI_SPLATS]          = { "multiple splats in multiple assignment", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST]       = { "unexpected '%.*s' resulting in multiple splats in multiple assignment", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_NOT_EXPRESSION]                     = { "expected an expression after `not`", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_NO_LOCAL_VARIABLE]                  = { "%.*s: no such local variable", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_NUMBER_LITERAL_UNDERSCORE]          = { "number literal ending with a `_`", PM_ERROR_LEVEL_FATAL },
@@ -268,7 +275,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
     [PM_ERR_STATEMENT_UNDEF]                    = { "unexpected an `undef` at a non-statement position", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_STRING_CONCATENATION]               = { "expected a string for concatenation", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_STRING_INTERPOLATED_TERM]           = { "expected a closing delimiter for the interpolated string", PM_ERROR_LEVEL_FATAL },
-    [PM_ERR_STRING_LITERAL_TERM]                = { "expected a closing delimiter for the string literal", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_STRING_LITERAL_EOF]                 = { "unterminated string meets end of file", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_STRING_LITERAL_TERM]                = { "unexpected %s, expected a string literal terminator", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_SYMBOL_INVALID]                     = { "invalid symbol", PM_ERROR_LEVEL_FATAL }, // TODO expected symbol? prism.c ~9719
     [PM_ERR_SYMBOL_TERM_DYNAMIC]                = { "expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_SYMBOL_TERM_INTERPOLATED]           = { "expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_FATAL },
@@ -276,17 +284,14 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
     [PM_ERR_TERNARY_EXPRESSION_FALSE]           = { "expected an expression after `:` in the ternary operator", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_TERNARY_EXPRESSION_TRUE]            = { "expected an expression after `?` in the ternary operator", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_UNDEF_ARGUMENT]                     = { "invalid argument being passed to `undef`; expected a bare word, constant, or symbol argument", PM_ERROR_LEVEL_FATAL },
-    [PM_ERR_UNARY_RECEIVER_BANG]                = { "expected a receiver for unary `!`", PM_ERROR_LEVEL_FATAL },
-    [PM_ERR_UNARY_RECEIVER_MINUS]               = { "expected a receiver for unary `-`", PM_ERROR_LEVEL_FATAL },
-    [PM_ERR_UNARY_RECEIVER_PLUS]                = { "expected a receiver for unary `+`", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_UNARY_RECEIVER]                     = { "unexpected %s, expected a receiver for unary `%c`", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT]     = { "unexpected %s, assuming it is closing the parent %s", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_UNEXPECTED_TOKEN_IGNORE]            = { "unexpected %s, ignoring it", PM_ERROR_LEVEL_FATAL },
-    [PM_ERR_UNARY_RECEIVER_TILDE]               = { "expected a receiver for unary `~`", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_UNTIL_TERM]                         = { "expected an `end` to close the `until` statement", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_VOID_EXPRESSION]                    = { "unexpected void value expression", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_WHILE_TERM]                         = { "expected an `end` to close the `while` statement", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_WRITE_TARGET_IN_METHOD]             = { "dynamic constant assignment", PM_ERROR_LEVEL_FATAL },
-    [PM_ERR_WRITE_TARGET_READONLY]              = { "immutable variable as a write target", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_WRITE_TARGET_READONLY]              = { "Can't set variable %.*s", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_WRITE_TARGET_UNEXPECTED]            = { "unexpected write target", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_XSTRING_TERM]                       = { "expected a closing delimiter for the `%x` or backtick string", PM_ERROR_LEVEL_FATAL },

data/src/encoding.c CHANGED Viewed

@@ -2253,12 +2253,12 @@ static const uint8_t pm_utf_8_dfa[] = {
 static pm_unicode_codepoint_t
 pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
     assert(n >= 0);
-    size_t maximum = (size_t) n;
+    size_t maximum = (n > 4) ? 4 : ((size_t) n);
     uint32_t codepoint;
     uint32_t state = 0;
-    for (size_t index = 0; index < 4 && index < maximum; index++) {
+    for (size_t index = 0; index < maximum; index++) {
         uint32_t byte = b[index];
         uint32_t type = pm_utf_8_dfa[byte];
@@ -2267,7 +2267,7 @@ pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
             (0xffu >> type) & (byte);
         state = pm_utf_8_dfa[256 + (state * 16) + type];
-        if (!state) {
+        if (state == 0) {
             *width = index + 1;
             return (pm_unicode_codepoint_t) codepoint;
         }
@@ -2282,9 +2282,17 @@ pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
  */
 size_t
 pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
-    size_t width;
-    pm_utf_8_codepoint(b, n, &width);
-    return width;
+    assert(n >= 0);
+    size_t maximum = (n > 4) ? 4 : ((size_t) n);
+    uint32_t state = 0;
+    for (size_t index = 0; index < maximum; index++) {
+        state = pm_utf_8_dfa[256 + (state * 16) + pm_utf_8_dfa[b[index]]];
+        if (state == 0) return index + 1;
+    }
+    return 0;
 }
 /**
@@ -4186,15 +4194,6 @@ pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
     return 0;
 }
-/**
- * Returns the size of the next character in the KOI-8 encoding. This means
- * checking if it's a valid codepoint in KOI-8 and if it is returning 1.
- */
-static size_t
-pm_encoding_koi8_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
-    return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
-}
 /**
  * Returns the size of the next character in the Shift_JIS encoding, or 0 if a
  * character cannot be decoded from the given bytes.
@@ -4652,7 +4651,7 @@ const pm_encoding_t pm_encodings[] = {
     },
     [PM_ENCODING_KOI8_R] = {
         .name = "KOI8-R",
-        .char_width = pm_encoding_koi8_char_width,
+        .char_width = pm_encoding_single_char_width,
         .alnum_char = pm_encoding_koi8_r_alnum_char,
         .alpha_char = pm_encoding_koi8_r_alpha_char,
         .isupper_char = pm_encoding_koi8_r_isupper_char,
@@ -4660,7 +4659,7 @@ const pm_encoding_t pm_encodings[] = {
     },
     [PM_ENCODING_KOI8_U] = {
         .name = "KOI8-U",
-        .char_width = pm_encoding_koi8_char_width,
+        .char_width = pm_encoding_single_char_width,
         .alnum_char = pm_encoding_koi8_u_alnum_char,
         .alpha_char = pm_encoding_koi8_u_alpha_char,
         .isupper_char = pm_encoding_koi8_u_isupper_char,

data/src/options.c CHANGED Viewed

@@ -45,17 +45,22 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length
     }
     if (length == 5) {
-        if (strncmp(version, "3.3.0", 5) == 0) {
+        if (strncmp(version, "3.3.0", length) == 0) {
             options->version = PM_OPTIONS_VERSION_CRUBY_3_3_0;
             return true;
         }
-        if (strncmp(version, "latest", 6) == 0) {
+        if (strncmp(version, "3.4.0", length) == 0) {
             options->version = PM_OPTIONS_VERSION_LATEST;
             return true;
         }
     }
+    if (length == 6 && strncmp(version, "latest", length) == 0) {
+        options->version = PM_OPTIONS_VERSION_LATEST;
+        return true;
+    }
     return false;
 }