RubyGems - prism - Versions diffs - 0.19.0 → 0.24.0 - Mend

prism 0.19.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +102 -1
data/Makefile +5 -0
data/README.md +9 -6
data/config.yml +236 -38
data/docs/build_system.md +19 -2
data/docs/cruby_compilation.md +27 -0
data/docs/parser_translation.md +34 -0
data/docs/parsing_rules.md +19 -0
data/docs/releasing.md +84 -16
data/docs/ruby_api.md +1 -1
data/docs/ruby_parser_translation.md +19 -0
data/docs/serialization.md +19 -5
data/ext/prism/api_node.c +1989 -1525
data/ext/prism/extension.c +130 -30
data/ext/prism/extension.h +2 -2
data/include/prism/ast.h +1700 -505
data/include/prism/defines.h +8 -0
data/include/prism/diagnostic.h +49 -7
data/include/prism/encoding.h +17 -0
data/include/prism/options.h +40 -14
data/include/prism/parser.h +34 -18
data/include/prism/util/pm_buffer.h +9 -0
data/include/prism/util/pm_constant_pool.h +18 -0
data/include/prism/util/pm_newline_list.h +4 -14
data/include/prism/util/pm_strpbrk.h +4 -1
data/include/prism/version.h +2 -2
data/include/prism.h +19 -2
data/lib/prism/debug.rb +11 -5
data/lib/prism/desugar_compiler.rb +225 -80
data/lib/prism/dot_visitor.rb +36 -14
data/lib/prism/dsl.rb +302 -299
data/lib/prism/ffi.rb +107 -76
data/lib/prism/lex_compat.rb +17 -1
data/lib/prism/node.rb +4580 -2607
data/lib/prism/node_ext.rb +27 -4
data/lib/prism/parse_result.rb +75 -29
data/lib/prism/serialize.rb +633 -305
data/lib/prism/translation/parser/compiler.rb +1838 -0
data/lib/prism/translation/parser/lexer.rb +335 -0
data/lib/prism/translation/parser/rubocop.rb +45 -0
data/lib/prism/translation/parser.rb +190 -0
data/lib/prism/translation/parser33.rb +12 -0
data/lib/prism/translation/parser34.rb +12 -0
data/lib/prism/translation/ripper.rb +696 -0
data/lib/prism/translation/ruby_parser.rb +1521 -0
data/lib/prism/translation.rb +11 -0
data/lib/prism.rb +1 -1
data/prism.gemspec +18 -7
data/rbi/prism.rbi +150 -88
data/rbi/prism_static.rbi +15 -3
data/sig/prism.rbs +996 -961
data/sig/prism_static.rbs +123 -46
data/src/diagnostic.c +264 -219
data/src/encoding.c +21 -26
data/src/node.c +2 -6
data/src/options.c +29 -5
data/src/prettyprint.c +176 -44
data/src/prism.c +1499 -564
data/src/serialize.c +35 -21
data/src/token_type.c +353 -4
data/src/util/pm_buffer.c +11 -0
data/src/util/pm_constant_pool.c +37 -11
data/src/util/pm_newline_list.c +6 -15
data/src/util/pm_string.c +0 -7
data/src/util/pm_strpbrk.c +122 -14
metadata +16 -5
data/docs/building.md +0 -29
data/lib/prism/ripper_compat.rb +0 -207

data/src/util/pm_newline_list.c CHANGED Viewed

@@ -45,25 +45,13 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
     return true;
 }
-/**
- * Conditionally append a new offset to the newline list, if the value passed in
- * is a newline.
- */
-bool
-pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) {
-    if (*cursor != '\n') {
-        return true;
-    }
-    return pm_newline_list_append(list, cursor);
-}
 /**
  * Returns the line and column of the given offset. If the offset is not in the
  * list, the line and column of the closest offset less than the given offset
  * are returned.
  */
 pm_line_column_t
-pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor) {
+pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
     assert(cursor >= list->start);
     size_t offset = (size_t) (cursor - list->start);
@@ -74,7 +62,7 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
         size_t mid = left + (right - left) / 2;
         if (list->offsets[mid] == offset) {
-            return ((pm_line_column_t) { mid, 0 });
+            return ((pm_line_column_t) { ((int32_t) mid) + start_line, 0 });
         }
         if (list->offsets[mid] < offset) {
@@ -84,7 +72,10 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
         }
     }
-    return ((pm_line_column_t) { left - 1, offset - list->offsets[left - 1] });
+    return ((pm_line_column_t) {
+        .line = ((int32_t) left) + start_line - 1,
+        .column = (uint32_t) (offset - list->offsets[left - 1])
+    });
 }
 /**

data/src/util/pm_string.c CHANGED Viewed

@@ -65,7 +65,6 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
     HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
     if (file == INVALID_HANDLE_VALUE) {
-        perror("CreateFile failed");
         return false;
     }
@@ -73,7 +72,6 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
     DWORD file_size = GetFileSize(file, NULL);
     if (file_size == INVALID_FILE_SIZE) {
         CloseHandle(file);
-        perror("GetFileSize failed");
         return false;
     }
@@ -90,7 +88,6 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
     HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
     if (mapping == NULL) {
         CloseHandle(file);
-        perror("CreateFileMapping failed");
         return false;
     }
@@ -100,7 +97,6 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
     CloseHandle(file);
     if (source == NULL) {
-        perror("MapViewOfFile failed");
         return false;
     }
@@ -110,7 +106,6 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
     // Open the file for reading
     int fd = open(filepath, O_RDONLY);
     if (fd == -1) {
-        perror("open");
         return false;
     }
@@ -118,7 +113,6 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
     struct stat sb;
     if (fstat(fd, &sb) == -1) {
         close(fd);
-        perror("fstat");
         return false;
     }
@@ -135,7 +129,6 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
     source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
     if (source == MAP_FAILED) {
-        perror("Map failed");
         return false;
     }

data/src/util/pm_strpbrk.c CHANGED Viewed

@@ -1,10 +1,18 @@
 #include "prism/util/pm_strpbrk.h"
 /**
- * This is the slow path that does care about the encoding.
+ * Add an invalid multibyte character error to the parser.
+ */
+static inline void
+pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+    pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
+}
+/**
+ * This is the default path.
  */
 static inline const uint8_t *
-pm_strpbrk_multi_byte(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
+pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
     size_t index = 0;
     while (index < maximum) {
@@ -12,22 +20,39 @@ pm_strpbrk_multi_byte(const pm_parser_t *parser, const uint8_t *source, const ui
             return source + index;
         }
-        size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
-        if (width == 0) {
-            return NULL;
-        }
+        if (source[index] < 0x80) {
+            index++;
+        } else {
+            size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
-        index += width;
+            if (width > 0) {
+                index += width;
+            } else if (!validate) {
+                index++;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+                do {
+                    index++;
+                } while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+            }
+        }
     }
     return NULL;
 }
 /**
- * This is the fast path that does not care about the encoding.
+ * This is the path when the encoding is ASCII-8BIT.
  */
 static inline const uint8_t *
-pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) {
+pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maximum) {
     size_t index = 0;
     while (index < maximum) {
@@ -41,6 +66,85 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
     return NULL;
 }
+/**
+ * This is the slow path that does care about the encoding.
+ */
+static inline const uint8_t *
+pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+    size_t index = 0;
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+        if (source[index] < 0x80) {
+            index++;
+        } else {
+            size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+            if (width > 0) {
+                index += width;
+            } else if (!validate) {
+                index++;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+                do {
+                    index++;
+                } while (index < maximum && parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+            }
+        }
+    }
+    return NULL;
+}
+/**
+ * This is the fast path that does not care about the encoding because we know
+ * the encoding only supports single-byte characters.
+ */
+static inline const uint8_t *
+pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+    size_t index = 0;
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+        if (source[index] < 0x80 || !validate) {
+            index++;
+        } else {
+            size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+            if (width > 0) {
+                index += width;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+                do {
+                    index++;
+                } while (index < maximum && parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+            }
+        }
+    }
+    return NULL;
+}
 /**
  * Here we have rolled our own version of strpbrk. The standard library strpbrk
  * has undefined behavior when the source string is not null-terminated. We want
@@ -57,16 +161,20 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
  *
  * Finally, we want to support encodings wherein the charset could contain
  * characters that are trailing bytes of multi-byte characters. For example, in
- * Shift-JIS, the backslash character can be a trailing byte. In that case we
+ * Shift_JIS, the backslash character can be a trailing byte. In that case we
  * need to take a slower path and iterate one multi-byte character at a time.
  */
 const uint8_t *
-pm_strpbrk(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
+pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
     if (length <= 0) {
         return NULL;
-    } else if (parser->encoding_changed && parser->encoding->multibyte) {
-        return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length);
+    } else if (!parser->encoding_changed) {
+        return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
+    } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
+        return pm_strpbrk_ascii_8bit(source, charset, (size_t) length);
+    } else if (parser->encoding->multibyte) {
+        return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
     } else {
-        return pm_strpbrk_single_byte(source, charset, (size_t) length);
+        return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate);
     }
 }

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: prism
 version: !ruby/object:Gem::Version
-  version: 0.19.0
+  version: 0.24.0
 platform: ruby
 authors:
 - Shopify
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-12-14 00:00:00.000000000 Z
+date: 2024-02-15 00:00:00.000000000 Z
 dependencies: []
 description:
 email:
@@ -26,8 +26,8 @@ files:
 - README.md
 - config.yml
 - docs/build_system.md
-- docs/building.md
 - docs/configuration.md
+- docs/cruby_compilation.md
 - docs/design.md
 - docs/encoding.md
 - docs/fuzzing.md
@@ -35,9 +35,12 @@ files:
 - docs/javascript.md
 - docs/local_variable_depth.md
 - docs/mapping.md
+- docs/parser_translation.md
+- docs/parsing_rules.md
 - docs/releasing.md
 - docs/ripper.md
 - docs/ruby_api.md
+- docs/ruby_parser_translation.md
 - docs/serialization.md
 - docs/testing.md
 - ext/prism/api_node.c
@@ -86,8 +89,16 @@ files:
 - lib/prism/parse_result/comments.rb
 - lib/prism/parse_result/newlines.rb
 - lib/prism/pattern.rb
-- lib/prism/ripper_compat.rb
 - lib/prism/serialize.rb
+- lib/prism/translation.rb
+- lib/prism/translation/parser.rb
+- lib/prism/translation/parser/compiler.rb
+- lib/prism/translation/parser/lexer.rb
+- lib/prism/translation/parser/rubocop.rb
+- lib/prism/translation/parser33.rb
+- lib/prism/translation/parser34.rb
+- lib/prism/translation/ripper.rb
+- lib/prism/translation/ruby_parser.rb
 - lib/prism/visitor.rb
 - prism.gemspec
 - rbi/prism.rbi
@@ -130,7 +141,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 3.0.0
+      version: 2.7.0
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="

data/docs/building.md DELETED Viewed

@@ -1,29 +0,0 @@
-# Building
-The following describes how to build prism from source. This comes directly from the [Makefile](../Makefile).
-## Common
-All of the source files match `src/**/*.c` and all of the headers match `include/**/*.h`.
-The following flags should be used to compile prism:
-* `-std=c99` - Use the C99 standard
-* `-Wall -Wconversion -Wextra -Wpedantic -Wundef -Wno-missing-braces` - Enable the warnings we care about
-* `-Werror` - Treat warnings as errors
-* `-fvisibility=hidden` - Hide all symbols by default
-## Shared
-If you want to build prism as a shared library and link against it, you should compile with:
-* `-fPIC -shared` - Compile as a shared library
-* `-DPRISM_EXPORT_SYMBOLS` - Export the symbols (by default nothing is exported)
-## Flags
-`make` respects the `MAKEFLAGS` environment variable. As such, to speed up the build you can run:
-```
-MAKEFLAGS="-j10" bundle exec rake compile
-```

data/lib/prism/ripper_compat.rb DELETED Viewed

@@ -1,207 +0,0 @@
-# frozen_string_literal: true
-require "ripper"
-module Prism
-  # Note: This integration is not finished, and therefore still has many
-  # inconsistencies with Ripper. If you'd like to help out, pull requests would
-  # be greatly appreciated!
-  #
-  # This class is meant to provide a compatibility layer between prism and
-  # Ripper. It functions by parsing the entire tree first and then walking it
-  # and executing each of the Ripper callbacks as it goes.
-  #
-  # This class is going to necessarily be slower than the native Ripper API. It
-  # is meant as a stopgap until developers migrate to using prism. It is also
-  # meant as a test harness for the prism parser.
-  #
-  # To use this class, you treat `Prism::RipperCompat` effectively as you would
-  # treat the `Ripper` class.
-  class RipperCompat < Visitor
-    # This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
-    # returns the arrays of [type, *children].
-    class SexpBuilder < RipperCompat
-      private
-      Ripper::PARSER_EVENTS.each do |event|
-        define_method(:"on_#{event}") do |*args|
-          [event, *args]
-        end
-      end
-      Ripper::SCANNER_EVENTS.each do |event|
-        define_method(:"on_#{event}") do |value|
-          [:"@#{event}", value, [lineno, column]]
-        end
-      end
-    end
-    # This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that
-    # returns the same values as ::Ripper::SexpBuilder except with a couple of
-    # niceties that flatten linked lists into arrays.
-    class SexpBuilderPP < SexpBuilder
-      private
-      def _dispatch_event_new # :nodoc:
-        []
-      end
-      def _dispatch_event_push(list, item) # :nodoc:
-        list << item
-        list
-      end
-      Ripper::PARSER_EVENT_TABLE.each do |event, arity|
-        case event
-        when /_new\z/
-          alias_method :"on_#{event}", :_dispatch_event_new if arity == 0
-        when /_add\z/
-          alias_method :"on_#{event}", :_dispatch_event_push
-        end
-      end
-    end
-    # The source that is being parsed.
-    attr_reader :source
-    # The current line number of the parser.
-    attr_reader :lineno
-    # The current column number of the parser.
-    attr_reader :column
-    # Create a new RipperCompat object with the given source.
-    def initialize(source)
-      @source = source
-      @result = nil
-      @lineno = nil
-      @column = nil
-    end
-    ############################################################################
-    # Public interface
-    ############################################################################
-    # True if the parser encountered an error during parsing.
-    def error?
-      result.failure?
-    end
-    # Parse the source and return the result.
-    def parse
-      result.magic_comments.each do |magic_comment|
-        on_magic_comment(magic_comment.key, magic_comment.value)
-      end
-      if error?
-        result.errors.each do |error|
-          on_parse_error(error.message)
-        end
-      else
-        result.value.accept(self)
-      end
-    end
-    ############################################################################
-    # Visitor methods
-    ############################################################################
-    # Visit a CallNode node.
-    def visit_call_node(node)
-      if !node.message.match?(/^[[:alpha:]_]/) && node.opening_loc.nil? && node.arguments&.arguments&.length == 1
-        left = visit(node.receiver)
-        right = visit(node.arguments.arguments.first)
-        bounds(node.location)
-        on_binary(left, node.name, right)
-      else
-        raise NotImplementedError
-      end
-    end
-    # Visit a FloatNode node.
-    def visit_float_node(node)
-      bounds(node.location)
-      on_float(node.slice)
-    end
-    # Visit a ImaginaryNode node.
-    def visit_imaginary_node(node)
-      bounds(node.location)
-      on_imaginary(node.slice)
-    end
-    # Visit an IntegerNode node.
-    def visit_integer_node(node)
-      bounds(node.location)
-      on_int(node.slice)
-    end
-    # Visit a RationalNode node.
-    def visit_rational_node(node)
-      bounds(node.location)
-      on_rational(node.slice)
-    end
-    # Visit a StatementsNode node.
-    def visit_statements_node(node)
-      bounds(node.location)
-      node.body.inject(on_stmts_new) do |stmts, stmt|
-        on_stmts_add(stmts, visit(stmt))
-      end
-    end
-    # Visit a ProgramNode node.
-    def visit_program_node(node)
-      statements = visit(node.statements)
-      bounds(node.location)
-      on_program(statements)
-    end
-    ############################################################################
-    # Entrypoints for subclasses
-    ############################################################################
-    # This is a convenience method that runs the SexpBuilder subclass parser.
-    def self.sexp_raw(source)
-      SexpBuilder.new(source).parse
-    end
-    # This is a convenience method that runs the SexpBuilderPP subclass parser.
-    def self.sexp(source)
-      SexpBuilderPP.new(source).parse
-    end
-    private
-    # This method is responsible for updating lineno and column information
-    # to reflect the current node.
-    #
-    # This method could be drastically improved with some caching on the start
-    # of every line, but for now it's good enough.
-    def bounds(location)
-      @lineno = location.start_line
-      @column = location.start_column
-    end
-    # Lazily initialize the parse result.
-    def result
-      @result ||= Prism.parse(source)
-    end
-    def _dispatch0; end # :nodoc:
-    def _dispatch1(_); end # :nodoc:
-    def _dispatch2(_, _); end # :nodoc:
-    def _dispatch3(_, _, _); end # :nodoc:
-    def _dispatch4(_, _, _, _); end # :nodoc:
-    def _dispatch5(_, _, _, _, _); end # :nodoc:
-    def _dispatch7(_, _, _, _, _, _, _); end # :nodoc:
-    alias_method :on_parse_error, :_dispatch1
-    alias_method :on_magic_comment, :_dispatch2
-    (Ripper::SCANNER_EVENT_TABLE.merge(Ripper::PARSER_EVENT_TABLE)).each do |event, arity|
-      alias_method :"on_#{event}", :"_dispatch#{arity}"
-    end
-  end
-end