RubyGems - prism - Versions diffs - 0.29.0 → 0.30.0 - Mend

prism 0.29.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +22 -1
data/CONTRIBUTING.md +0 -4
data/README.md +1 -0
data/config.yml +66 -9
data/docs/fuzzing.md +1 -1
data/docs/ripper_translation.md +22 -0
data/ext/prism/api_node.c +30 -12
data/ext/prism/extension.c +107 -372
data/ext/prism/extension.h +1 -1
data/include/prism/ast.h +138 -70
data/include/prism/diagnostic.h +7 -2
data/include/prism/node.h +0 -21
data/include/prism/parser.h +23 -25
data/include/prism/regexp.h +17 -8
data/include/prism/static_literals.h +3 -2
data/include/prism/util/pm_char.h +1 -2
data/include/prism/util/pm_constant_pool.h +0 -8
data/include/prism/util/pm_integer.h +16 -9
data/include/prism/util/pm_string.h +0 -8
data/include/prism/version.h +2 -2
data/include/prism.h +0 -11
data/lib/prism/compiler.rb +3 -0
data/lib/prism/dispatcher.rb +14 -0
data/lib/prism/dot_visitor.rb +22 -3
data/lib/prism/dsl.rb +7 -2
data/lib/prism/ffi.rb +24 -3
data/lib/prism/inspect_visitor.rb +10 -8
data/lib/prism/mutation_compiler.rb +6 -1
data/lib/prism/node.rb +166 -241
data/lib/prism/node_ext.rb +21 -5
data/lib/prism/parse_result/comments.rb +0 -7
data/lib/prism/parse_result/newlines.rb +101 -11
data/lib/prism/parse_result.rb +17 -0
data/lib/prism/reflection.rb +3 -1
data/lib/prism/serialize.rb +80 -67
data/lib/prism/translation/parser/compiler.rb +134 -114
data/lib/prism/translation/parser.rb +6 -1
data/lib/prism/translation/ripper.rb +8 -6
data/lib/prism/translation/ruby_parser.rb +23 -5
data/lib/prism/visitor.rb +3 -0
data/lib/prism.rb +0 -4
data/prism.gemspec +1 -4
data/rbi/prism/node.rbi +63 -6
data/rbi/prism/visitor.rbi +3 -0
data/rbi/prism.rbi +6 -0
data/sig/prism/dsl.rbs +4 -1
data/sig/prism/mutation_compiler.rbs +1 -0
data/sig/prism/node.rbs +28 -4
data/sig/prism/visitor.rbs +1 -0
data/sig/prism.rbs +21 -0
data/src/diagnostic.c +27 -17
data/src/node.c +408 -1666
data/src/prettyprint.c +49 -6
data/src/prism.c +958 -991
data/src/regexp.c +133 -68
data/src/serialize.c +6 -1
data/src/static_literals.c +63 -84
data/src/token_type.c +2 -2
data/src/util/pm_constant_pool.c +0 -8
data/src/util/pm_integer.c +39 -11
data/src/util/pm_string.c +0 -12
data/src/util/pm_strpbrk.c +32 -6
metadata +2 -5
data/include/prism/util/pm_string_list.h +0 -44
data/lib/prism/debug.rb +0 -249
data/src/util/pm_string_list.c +0 -28

data/src/token_type.c CHANGED Viewed

@@ -362,7 +362,7 @@ const char *
 pm_token_type_human(pm_token_type_t token_type) {
     switch (token_type) {
         case PM_TOKEN_EOF:
-            return "end of file";
+            return "end-of-input";
         case PM_TOKEN_MISSING:
             return "missing token";
         case PM_TOKEN_NOT_PROVIDED:
@@ -684,7 +684,7 @@ pm_token_type_human(pm_token_type_t token_type) {
         case PM_TOKEN_USTAR:
             return "*";
         case PM_TOKEN_USTAR_STAR:
-            return "'**'";
+            return "**";
         case PM_TOKEN_WORDS_SEP:
             return "string separator";
         case PM_TOKEN___END__:

data/src/util/pm_constant_pool.c CHANGED Viewed

@@ -61,14 +61,6 @@ pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
     return false;
 }
-/**
- * Get the memory size of a list of constant ids.
- */
-size_t
-pm_constant_id_list_memsize(pm_constant_id_list_t *list) {
-    return sizeof(pm_constant_id_list_t) + (list->capacity * sizeof(pm_constant_id_t));
-}
 /**
  * Free the memory associated with a list of constant ids.
  */

data/src/util/pm_integer.c CHANGED Viewed

@@ -48,7 +48,7 @@ big_add(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint
 /**
  * Internal use for karatsuba_multiply. Calculates `a - b - c` with the given
- * base. Assume a, b, c, a - b - c all to be poitive.
+ * base. Assume a, b, c, a - b - c all to be positive.
  * Return pm_integer_t with values allocated. Not normalized.
  */
 static void
@@ -471,15 +471,18 @@ pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *
  * has already been validated, as internal validation checks are not performed
  * here.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end) {
-    // Ignore unary +. Unary + is parsed differently and will not end up here.
+    // Ignore unary +. Unary - is parsed differently and will not end up here.
     // Instead, it will modify the parsed integer later.
     if (*start == '+') start++;
     // Determine the multiplier from the base, and skip past any prefixes.
     uint32_t multiplier = 10;
     switch (base) {
+        case PM_INTEGER_BASE_DEFAULT:
+            while (*start == '0') start++; // 01 -> 1
+            break;
         case PM_INTEGER_BASE_BINARY:
             start += 2; // 0b
             multiplier = 2;
@@ -533,14 +536,6 @@ pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *s
     integer->value = (uint32_t) value;
 }
-/**
- * Return the memory size of the integer.
- */
-size_t
-pm_integer_memsize(const pm_integer_t *integer) {
-    return sizeof(pm_integer_t) + integer->length * sizeof(uint32_t);
-}
 /**
  * Compare two integers. This function returns -1 if the left integer is less
  * than the right integer, 0 if they are equal, and 1 if the left integer is
@@ -572,6 +567,39 @@ pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right) {
     return 0;
 }
+/**
+ * Reduce a ratio of integers to its simplest form.
+ */
+void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator) {
+    // If either the numerator or denominator do not fit into a 32-bit integer,
+    // then this function is a no-op. In the future, we may consider reducing
+    // even the larger numbers, but for now we're going to keep it simple.
+    if (
+        // If the numerator doesn't fit into a 32-bit integer, return early.
+        numerator->length != 0 ||
+        // If the denominator doesn't fit into a 32-bit integer, return early.
+        denominator->length != 0 ||
+        // If the numerator is 0, then return early.
+        numerator->value == 0 ||
+        // If the denominator is 1, then return early.
+        denominator->value == 1
+    ) return;
+    // Find the greatest common divisor of the numerator and denominator.
+    uint32_t divisor = numerator->value;
+    uint32_t remainder = denominator->value;
+    while (remainder != 0) {
+        uint32_t temporary = remainder;
+        remainder = divisor % remainder;
+        divisor = temporary;
+    }
+    // Divide the numerator and denominator by the greatest common divisor.
+    numerator->value /= divisor;
+    denominator->value /= divisor;
+}
 /**
  * Convert an integer to a decimal string.
  */

data/src/util/pm_string.c CHANGED Viewed

@@ -245,18 +245,6 @@ pm_string_file_init(pm_string_t *string, const char *filepath) {
 #endif
 }
-/**
- * Returns the memory size associated with the string.
- */
-size_t
-pm_string_memsize(const pm_string_t *string) {
-    size_t size = sizeof(pm_string_t);
-    if (string->type == PM_STRING_OWNED) {
-        size += string->length;
-    }
-    return size;
-}
 /**
  * Ensure the string is owned. If it is not, then reinitialize it as owned and
  * copy over the previous source.

data/src/util/pm_strpbrk.c CHANGED Viewed

@@ -8,6 +8,27 @@ pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start
     pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
 }
+/**
+ * Set the explicit encoding for the parser to the current encoding.
+ */
+static inline void
+pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) {
+    if (parser->explicit_encoding != NULL) {
+        if (parser->explicit_encoding == parser->encoding) {
+            // Okay, we already locked to this encoding.
+        } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+            // Not okay, we already found a Unicode escape sequence and this
+            // conflicts.
+            pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name);
+        } else {
+            // Should not be anything else.
+            assert(false && "unreachable");
+        }
+    }
+    parser->explicit_encoding = parser->encoding;
+}
 /**
  * This is the default path.
  */
@@ -52,7 +73,7 @@ pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *chars
  * This is the path when the encoding is ASCII-8BIT.
  */
 static inline const uint8_t *
-pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maximum) {
+pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
     size_t index = 0;
     while (index < maximum) {
@@ -60,6 +81,7 @@ pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maxi
             return source + index;
         }
+        if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1);
         index++;
     }
@@ -72,6 +94,7 @@ pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maxi
 static inline const uint8_t *
 pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
     size_t index = 0;
+    const pm_encoding_t *encoding = parser->encoding;
     while (index < maximum) {
         if (strchr((const char *) charset, source[index]) != NULL) {
@@ -81,7 +104,8 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
         if (source[index] < 0x80) {
             index++;
         } else {
-            size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+            size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+            if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width);
             if (width > 0) {
                 index += width;
@@ -96,7 +120,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
                 do {
                     index++;
-                } while (index < maximum && parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+                } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
                 pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
             }
@@ -113,6 +137,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
 static inline const uint8_t *
 pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
     size_t index = 0;
+    const pm_encoding_t *encoding = parser->encoding;
     while (index < maximum) {
         if (strchr((const char *) charset, source[index]) != NULL) {
@@ -122,7 +147,8 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
         if (source[index] < 0x80 || !validate) {
             index++;
         } else {
-            size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+            size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+            pm_strpbrk_explicit_encoding_set(parser, source, width);
             if (width > 0) {
                 index += width;
@@ -135,7 +161,7 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
                 do {
                     index++;
-                } while (index < maximum && parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+                } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
                 pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
             }
@@ -171,7 +197,7 @@ pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, p
     } else if (!parser->encoding_changed) {
         return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
     } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
-        return pm_strpbrk_ascii_8bit(source, charset, (size_t) length);
+        return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate);
     } else if (parser->encoding->multibyte) {
         return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
     } else {

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: prism
 version: !ruby/object:Gem::Version
-  version: 0.29.0
+  version: 0.30.0
 platform: ruby
 authors:
 - Shopify
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-05-10 00:00:00.000000000 Z
+date: 2024-06-07 00:00:00.000000000 Z
 dependencies: []
 description:
 email:
@@ -69,13 +69,11 @@ files:
 - include/prism/util/pm_memchr.h
 - include/prism/util/pm_newline_list.h
 - include/prism/util/pm_string.h
-- include/prism/util/pm_string_list.h
 - include/prism/util/pm_strncasecmp.h
 - include/prism/util/pm_strpbrk.h
 - include/prism/version.h
 - lib/prism.rb
 - lib/prism/compiler.rb
-- lib/prism/debug.rb
 - lib/prism/desugar_compiler.rb
 - lib/prism/dispatcher.rb
 - lib/prism/dot_visitor.rb
@@ -155,7 +153,6 @@ files:
 - src/util/pm_memchr.c
 - src/util/pm_newline_list.c
 - src/util/pm_string.c
-- src/util/pm_string_list.c
 - src/util/pm_strncasecmp.c
 - src/util/pm_strpbrk.c
 homepage: https://github.com/ruby/prism

data/include/prism/util/pm_string_list.h DELETED Viewed

@@ -1,44 +0,0 @@
-/**
- * @file pm_string_list.h
- *
- * A list of strings.
- */
-#ifndef PRISM_STRING_LIST_H
-#define PRISM_STRING_LIST_H
-#include "prism/defines.h"
-#include "prism/util/pm_string.h"
-#include <stddef.h>
-#include <stdlib.h>
-/**
- * A list of strings.
- */
-typedef struct {
-    /** The length of the string list. */
-    size_t length;
-    /** The capacity of the string list that has been allocated. */
-    size_t capacity;
-    /** A pointer to the start of the string list. */
-    pm_string_t *strings;
-} pm_string_list_t;
-/**
- * Append a pm_string_t to the given string list.
- *
- * @param string_list The string list to append to.
- * @param string The string to append.
- */
-void pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string);
-/**
- * Free the memory associated with the string list.
- *
- * @param string_list The string list to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_string_list_free(pm_string_list_t *string_list);
-#endif

data/lib/prism/debug.rb DELETED Viewed

@@ -1,249 +0,0 @@
-# frozen_string_literal: true
-module Prism
-  # This module is used for testing and debugging and is not meant to be used by
-  # consumers of this library.
-  module Debug
-    # A wrapper around a RubyVM::InstructionSequence that provides a more
-    # convenient interface for accessing parts of the iseq.
-    class ISeq # :nodoc:
-      attr_reader :parts
-      def initialize(parts)
-        @parts = parts
-      end
-      def type
-        parts[0]
-      end
-      def local_table
-        parts[10]
-      end
-      def instructions
-        parts[13]
-      end
-      def each_child
-        instructions.each do |instruction|
-          # Only look at arrays. Other instructions are line numbers or
-          # tracepoint events.
-          next unless instruction.is_a?(Array)
-          instruction.each do |opnd|
-            # Only look at arrays. Other operands are literals.
-            next unless opnd.is_a?(Array)
-            # Only look at instruction sequences. Other operands are literals.
-            next unless opnd[0] == "YARVInstructionSequence/SimpleDataFormat"
-            yield ISeq.new(opnd)
-          end
-        end
-      end
-    end
-    private_constant :ISeq
-    # :call-seq:
-    #   Debug::cruby_locals(source) -> Array
-    #
-    # For the given source, compiles with CRuby and returns a list of all of the
-    # sets of local variables that were encountered.
-    def self.cruby_locals(source)
-      verbose, $VERBOSE = $VERBOSE, nil
-      begin
-        locals = [] #: Array[Array[Symbol | Integer]]
-        stack = [ISeq.new(RubyVM::InstructionSequence.compile(source).to_a)]
-        while (iseq = stack.pop)
-          names = [*iseq.local_table]
-          names.map!.with_index do |name, index|
-            # When an anonymous local variable is present in the iseq's local
-            # table, it is represented as the stack offset from the top.
-            # However, when these are dumped to binary and read back in, they
-            # are replaced with the symbol :#arg_rest. To consistently handle
-            # this, we replace them here with their index.
-            if name == :"#arg_rest"
-              names.length - index + 1
-            else
-              name
-            end
-          end
-          locals << names
-          iseq.each_child { |child| stack << child }
-        end
-        locals
-      ensure
-        $VERBOSE = verbose
-      end
-    end
-    # Used to hold the place of a local that will be in the local table but
-    # cannot be accessed directly from the source code. For example, the
-    # iteration variable in a for loop or the positional parameter on a method
-    # definition that is destructured.
-    AnonymousLocal = Object.new
-    private_constant :AnonymousLocal
-    # :call-seq:
-    #   Debug::prism_locals(source) -> Array
-    #
-    # For the given source, parses with prism and returns a list of all of the
-    # sets of local variables that were encountered.
-    def self.prism_locals(source)
-      locals = [] #: Array[Array[Symbol | Integer]]
-      stack = [Prism.parse(source).value] #: Array[Prism::node]
-      while (node = stack.pop)
-        case node
-        when BlockNode, DefNode, LambdaNode
-          names = node.locals
-          params =
-            if node.is_a?(DefNode)
-              node.parameters
-            elsif node.parameters.is_a?(NumberedParametersNode)
-              nil
-            else
-              node.parameters&.parameters
-            end
-          # prism places parameters in the same order that they appear in the
-          # source. CRuby places them in the order that they need to appear
-          # according to their own internal calling convention. We mimic that
-          # order here so that we can compare properly.
-          if params
-            sorted = [
-              *params.requireds.map do |required|
-                if required.is_a?(RequiredParameterNode)
-                  required.name
-                else
-                  AnonymousLocal
-                end
-              end,
-              *params.optionals.map(&:name),
-              *((params.rest.name || :*) if params.rest && !params.rest.is_a?(ImplicitRestNode)),
-              *params.posts.map do |post|
-                if post.is_a?(RequiredParameterNode)
-                  post.name
-                else
-                  AnonymousLocal
-                end
-              end,
-              *params.keywords.grep(RequiredKeywordParameterNode).map(&:name),
-              *params.keywords.grep(OptionalKeywordParameterNode).map(&:name),
-            ]
-            sorted << AnonymousLocal if params.keywords.any?
-            if params.keyword_rest.is_a?(ForwardingParameterNode)
-              sorted.push(:*, :**, :&, :"...")
-            elsif params.keyword_rest.is_a?(KeywordRestParameterNode)
-              sorted << (params.keyword_rest.name || :**)
-            end
-            # Recurse down the parameter tree to find any destructured
-            # parameters and add them after the other parameters.
-            param_stack = params.requireds.concat(params.posts).grep(MultiTargetNode).reverse
-            while (param = param_stack.pop)
-              case param
-              when MultiTargetNode
-                param_stack.concat(param.rights.reverse)
-                param_stack << param.rest if param.rest&.expression && !sorted.include?(param.rest.expression.name)
-                param_stack.concat(param.lefts.reverse)
-              when RequiredParameterNode
-                sorted << param.name
-              when SplatNode
-                sorted << param.expression.name
-              end
-            end
-            if params.block
-              sorted << (params.block.name || :&)
-            end
-            names = sorted.concat(names - sorted)
-          end
-          names.map!.with_index do |name, index|
-            if name == AnonymousLocal
-              names.length - index + 1
-            else
-              name
-            end
-          end
-          locals << names
-        when ClassNode, ModuleNode, ProgramNode, SingletonClassNode
-          locals << node.locals
-        when ForNode
-          locals << [2]
-        when PostExecutionNode
-          locals.push([], [])
-        when InterpolatedRegularExpressionNode
-          locals << [] if node.once?
-        end
-        stack.concat(node.compact_child_nodes)
-      end
-      locals
-    end
-    # :call-seq:
-    #   Debug::newlines(source) -> Array
-    #
-    # For the given source string, return the byte offsets of every newline in
-    # the source.
-    def self.newlines(source)
-      Prism.parse(source).source.offsets
-    end
-    # A wrapping around prism's internal encoding data structures. This is used
-    # for reflection and debugging purposes.
-    class Encoding
-      # The name of the encoding, that can be passed to Encoding.find.
-      attr_reader :name
-      # Initialize a new encoding with the given name and whether or not it is
-      # a multibyte encoding.
-      def initialize(name, multibyte)
-        @name = name
-        @multibyte = multibyte
-      end
-      # Whether or not the encoding is a multibyte encoding.
-      def multibyte?
-        @multibyte
-      end
-      # Returns the number of bytes of the first character in the source string,
-      # if it is valid for the encoding. Otherwise, returns 0.
-      def width(source)
-        Encoding._width(name, source)
-      end
-      # Returns true if the first character in the source string is a valid
-      # alphanumeric character for the encoding.
-      def alnum?(source)
-        Encoding._alnum?(name, source)
-      end
-      # Returns true if the first character in the source string is a valid
-      # alphabetic character for the encoding.
-      def alpha?(source)
-        Encoding._alpha?(name, source)
-      end
-      # Returns true if the first character in the source string is a valid
-      # uppercase character for the encoding.
-      def upper?(source)
-        Encoding._upper?(name, source)
-      end
-    end
-  end
-end

data/src/util/pm_string_list.c DELETED Viewed

@@ -1,28 +0,0 @@
-#include "prism/util/pm_string_list.h"
-/**
- * Append a pm_string_t to the given string list.
- */
-void
-pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string) {
-    if (string_list->length + 1 > string_list->capacity) {
-        if (string_list->capacity == 0) {
-            string_list->capacity = 1;
-        } else {
-            string_list->capacity *= 2;
-        }
-        string_list->strings = xrealloc(string_list->strings, string_list->capacity * sizeof(pm_string_t));
-        if (string_list->strings == NULL) abort();
-    }
-    string_list->strings[string_list->length++] = *string;
-}
-/**
- * Free the memory associated with the string list
- */
-void
-pm_string_list_free(pm_string_list_t *string_list) {
-    xfree(string_list->strings);
-}