RubyGems - yarp - Versions diffs - 0.6.0 - Mend

yarp 0.6.0

Files changed (82) hide show

checksums.yaml +7 -0
data/CODE_OF_CONDUCT.md +76 -0
data/CONTRIBUTING.md +51 -0
data/LICENSE.md +7 -0
data/Makefile.in +79 -0
data/README.md +86 -0
data/config.h.in +25 -0
data/config.yml +2147 -0
data/configure +4487 -0
data/docs/build_system.md +85 -0
data/docs/building.md +26 -0
data/docs/configuration.md +56 -0
data/docs/design.md +53 -0
data/docs/encoding.md +116 -0
data/docs/extension.md +20 -0
data/docs/fuzzing.md +93 -0
data/docs/heredocs.md +36 -0
data/docs/mapping.md +117 -0
data/docs/ripper.md +36 -0
data/docs/serialization.md +130 -0
data/docs/testing.md +55 -0
data/ext/yarp/api_node.c +3680 -0
data/ext/yarp/api_pack.c +256 -0
data/ext/yarp/extconf.rb +131 -0
data/ext/yarp/extension.c +547 -0
data/ext/yarp/extension.h +18 -0
data/include/yarp/ast.h +1412 -0
data/include/yarp/defines.h +54 -0
data/include/yarp/diagnostic.h +24 -0
data/include/yarp/enc/yp_encoding.h +94 -0
data/include/yarp/node.h +36 -0
data/include/yarp/pack.h +141 -0
data/include/yarp/parser.h +389 -0
data/include/yarp/regexp.h +19 -0
data/include/yarp/unescape.h +42 -0
data/include/yarp/util/yp_buffer.h +39 -0
data/include/yarp/util/yp_char.h +75 -0
data/include/yarp/util/yp_constant_pool.h +64 -0
data/include/yarp/util/yp_list.h +67 -0
data/include/yarp/util/yp_memchr.h +14 -0
data/include/yarp/util/yp_newline_list.h +54 -0
data/include/yarp/util/yp_state_stack.h +24 -0
data/include/yarp/util/yp_string.h +57 -0
data/include/yarp/util/yp_string_list.h +28 -0
data/include/yarp/util/yp_strpbrk.h +29 -0
data/include/yarp/version.h +5 -0
data/include/yarp.h +69 -0
data/lib/yarp/lex_compat.rb +759 -0
data/lib/yarp/node.rb +7428 -0
data/lib/yarp/pack.rb +185 -0
data/lib/yarp/ripper_compat.rb +174 -0
data/lib/yarp/serialize.rb +389 -0
data/lib/yarp.rb +330 -0
data/src/diagnostic.c +25 -0
data/src/enc/yp_big5.c +79 -0
data/src/enc/yp_euc_jp.c +85 -0
data/src/enc/yp_gbk.c +88 -0
data/src/enc/yp_shift_jis.c +83 -0
data/src/enc/yp_tables.c +509 -0
data/src/enc/yp_unicode.c +2320 -0
data/src/enc/yp_windows_31j.c +83 -0
data/src/node.c +2011 -0
data/src/pack.c +493 -0
data/src/prettyprint.c +1782 -0
data/src/regexp.c +580 -0
data/src/serialize.c +1576 -0
data/src/token_type.c +347 -0
data/src/unescape.c +576 -0
data/src/util/yp_buffer.c +78 -0
data/src/util/yp_char.c +229 -0
data/src/util/yp_constant_pool.c +147 -0
data/src/util/yp_list.c +50 -0
data/src/util/yp_memchr.c +31 -0
data/src/util/yp_newline_list.c +119 -0
data/src/util/yp_state_stack.c +25 -0
data/src/util/yp_string.c +207 -0
data/src/util/yp_string_list.c +32 -0
data/src/util/yp_strncasecmp.c +20 -0
data/src/util/yp_strpbrk.c +66 -0
data/src/yarp.c +13211 -0
data/yarp.gemspec +100 -0
metadata +125 -0

data/lib/yarp.rb ADDED Viewed

@@ -0,0 +1,330 @@
+# frozen_string_literal: true
+module YARP
+  # This represents a source of Ruby code that has been parsed. It is used in
+  # conjunction with locations to allow them to resolve line numbers and source
+  # ranges.
+  class Source
+    attr_reader :source, :offsets
+    def initialize(source, offsets)
+      @source = source
+      @offsets = offsets
+    end
+    def slice(offset, length)
+      source.byteslice(offset, length)
+    end
+    def line(value)
+      offsets.bsearch_index { |offset| offset > value } || offsets.length
+    end
+    def column(value)
+      value - offsets[line(value) - 1]
+    end
+  end
+  # This represents a location in the source.
+  class Location
+    # A Source object that is used to determine more information from the given
+    # offset and length.
+    private attr_reader :source
+    # The byte offset from the beginning of the source where this location
+    # starts.
+    attr_reader :start_offset
+    # The length of this location in bytes.
+    attr_reader :length
+    def initialize(source, start_offset, length)
+      @source = source
+      @start_offset = start_offset
+      @length = length
+    end
+    def inspect
+      "#<YARP::Location @start_offset=#{@start_offset} @length=#{@length}>"
+    end
+    # The source code that this location represents.
+    def slice
+      source.slice(start_offset, length)
+    end
+    # The byte offset from the beginning of the source where this location ends.
+    def end_offset
+      start_offset + length
+    end
+    # The line number where this location starts.
+    def start_line
+      source.line(start_offset)
+    end
+    # The line number where this location ends.
+    def end_line
+      source.line(end_offset - 1)
+    end
+    # The column number in bytes where this location starts from the start of
+    # the line.
+    def start_column
+      source.column(start_offset)
+    end
+    # The column number in bytes where this location ends from the start of the
+    # line.
+    def end_column
+      source.column(end_offset - 1)
+    end
+    def deconstruct_keys(keys)
+      { start_offset: start_offset, end_offset: end_offset }
+    end
+    def pretty_print(q)
+      q.text("(#{start_offset}...#{end_offset})")
+    end
+    def ==(other)
+      other.is_a?(Location) &&
+        other.start_offset == start_offset &&
+        other.end_offset == end_offset
+    end
+    def self.null
+      new(0, 0)
+    end
+  end
+  # This represents a comment that was encountered during parsing.
+  class Comment
+    attr_reader :type, :location
+    def initialize(type, location)
+      @type = type
+      @location = location
+    end
+    def deconstruct_keys(keys)
+      { type: type, location: location }
+    end
+  end
+  # This represents an error that was encountered during parsing.
+  class ParseError
+    attr_reader :message, :location
+    def initialize(message, location)
+      @message = message
+      @location = location
+    end
+    def deconstruct_keys(keys)
+      { message: message, location: location }
+    end
+  end
+  # This represents a warning that was encountered during parsing.
+  class ParseWarning
+    attr_reader :message, :location
+    def initialize(message, location)
+      @message = message
+      @location = location
+    end
+    def deconstruct_keys(keys)
+      { message: message, location: location }
+    end
+  end
+  # A class that knows how to walk down the tree. None of the individual visit
+  # methods are implemented on this visitor, so it forces the consumer to
+  # implement each one that they need. For a default implementation that
+  # continues walking the tree, see the Visitor class.
+  class BasicVisitor
+    def visit(node)
+      node&.accept(self)
+    end
+    def visit_all(nodes)
+      nodes.map { |node| visit(node) }
+    end
+    def visit_child_nodes(node)
+      visit_all(node.child_nodes)
+    end
+  end
+  class Visitor < BasicVisitor
+  end
+  # This represents the result of a call to ::parse or ::parse_file. It contains
+  # the AST, any comments that were encounters, and any errors that were
+  # encountered.
+  class ParseResult
+    attr_reader :value, :comments, :errors, :warnings, :source
+    def initialize(value, comments, errors, warnings, source)
+      @value = value
+      @comments = comments
+      @errors = errors
+      @warnings = warnings
+      @source = source
+    end
+    def deconstruct_keys(keys)
+      { value: value, comments: comments, errors: errors, warnings: warnings }
+    end
+    def success?
+      errors.empty?
+    end
+    def failure?
+      !success?
+    end
+    # Keep in sync with Java MarkNewlinesVisitor
+    class MarkNewlinesVisitor < YARP::Visitor
+      def initialize(newline_marked)
+        @newline_marked = newline_marked
+      end
+      def visit_block_node(node)
+        old_newline_marked = @newline_marked
+        @newline_marked = Array.new(old_newline_marked.size, false)
+        begin
+          super(node)
+        ensure
+          @newline_marked = old_newline_marked
+        end
+      end
+      alias_method :visit_lambda_node, :visit_block_node
+      def visit_if_node(node)
+        node.set_newline_flag(@newline_marked)
+        super(node)
+      end
+      alias_method :visit_unless_node, :visit_if_node
+      def visit_statements_node(node)
+        node.body.each do |child|
+          child.set_newline_flag(@newline_marked)
+        end
+        super(node)
+      end
+    end
+    private_constant :MarkNewlinesVisitor
+    def mark_newlines
+      newline_marked = Array.new(1 + @source.offsets.size, false)
+      visitor = MarkNewlinesVisitor.new(newline_marked)
+      value.accept(visitor)
+      value
+    end
+  end
+  # This represents a token from the Ruby source.
+  class Token
+    attr_reader :type, :value, :location
+    def initialize(type, value, location)
+      @type = type
+      @value = value
+      @location = location
+    end
+    def deconstruct_keys(keys)
+      { type: type, value: value, location: location }
+    end
+    def pretty_print(q)
+      q.group do
+        q.text(type.to_s)
+        self.location.pretty_print(q)
+        q.text("(")
+        q.nest(2) do
+          q.breakable("")
+          q.pp(value)
+        end
+        q.breakable("")
+        q.text(")")
+      end
+    end
+    def ==(other)
+      other.is_a?(Token) &&
+        other.type == type &&
+        other.value == value
+    end
+  end
+  # This represents a node in the tree.
+  class Node
+    attr_reader :location
+    def newline?
+      @newline ? true : false
+    end
+    def set_newline_flag(newline_marked)
+      line = location.start_line
+      unless newline_marked[line]
+        newline_marked[line] = true
+        @newline = true
+      end
+    end
+    def pretty_print(q)
+      q.group do
+        q.text(self.class.name.split("::").last)
+        location.pretty_print(q)
+        q.text("[Li:#{location.start_line}]") if newline?
+        q.text("(")
+        q.nest(2) do
+          deconstructed = deconstruct_keys([])
+          deconstructed.delete(:location)
+          q.breakable("")
+          q.seplist(deconstructed, lambda { q.comma_breakable }, :each_value) { |value| q.pp(value) }
+        end
+        q.breakable("")
+        q.text(")")
+      end
+    end
+  end
+  # Load the serialized AST using the source as a reference into a tree.
+  def self.load(source, serialized)
+    Serialize.load(source, serialized)
+  end
+  # This module is used for testing and debugging and is not meant to be used by
+  # consumers of this library.
+  module Debug
+    def self.newlines(source)
+      YARP.parse(source).source.offsets
+    end
+    def self.parse_serialize_file(filepath)
+      parse_serialize_file_metadata(filepath, [filepath.bytesize, filepath.b, 0].pack("LA*L"))
+    end
+  end
+  # Marking this as private so that consumers don't see it. It makes it a little
+  # annoying for testing since you have to const_get it to access the methods,
+  # but at least this way it's clear it's not meant for consumers.
+  private_constant :Debug
+end
+require_relative "yarp/lex_compat"
+require_relative "yarp/node"
+require_relative "yarp/ripper_compat"
+require_relative "yarp/serialize"
+require_relative "yarp/pack"
+require "yarp/yarp"

data/src/diagnostic.c ADDED Viewed

@@ -0,0 +1,25 @@
+#include "yarp/diagnostic.h"
+// Append an error to the given list of diagnostic.
+bool
+yp_diagnostic_list_append(yp_list_t *list, const char *start, const char *end, const char *message) {
+    yp_diagnostic_t *diagnostic = (yp_diagnostic_t *) malloc(sizeof(yp_diagnostic_t));
+    if (diagnostic == NULL) return false;
+    *diagnostic = (yp_diagnostic_t) { .start = start, .end = end, .message = message };
+    yp_list_append(list, (yp_list_node_t *) diagnostic);
+    return true;
+}
+// Deallocate the internal state of the given diagnostic list.
+void
+yp_diagnostic_list_free(yp_list_t *list) {
+    yp_list_node_t *node, *next;
+    for (node = list->head; node != NULL; node = next) {
+        next = node->next;
+        yp_diagnostic_t *diagnostic = (yp_diagnostic_t *) node;
+        free(diagnostic);
+    }
+}

data/src/enc/yp_big5.c ADDED Viewed

@@ -0,0 +1,79 @@
+#include "yarp/enc/yp_encoding.h"
+typedef uint16_t yp_big5_codepoint_t;
+static yp_big5_codepoint_t
+yp_big5_codepoint(const char *c, ptrdiff_t n, size_t *width) {
+    const unsigned char *uc = (const unsigned char *) c;
+    // These are the single byte characters.
+    if (*uc < 0x80) {
+        *width = 1;
+        return *uc;
+    }
+    // These are the double byte characters.
+    if ((n > 1) && (uc[0] >= 0xA1 && uc[0] <= 0xFE) && (uc[1] >= 0x40 && uc[1] <= 0xFE)) {
+        *width = 2;
+        return (yp_big5_codepoint_t) (uc[0] << 8 | uc[1]);
+    }
+    *width = 0;
+    return 0;
+}
+static size_t
+yp_encoding_big5_char_width(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_big5_codepoint(c, n, &width);
+    return width;
+}
+static size_t
+yp_encoding_big5_alpha_char(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
+    if (width == 1) {
+        const char value = (const char) codepoint;
+        return yp_encoding_ascii_alpha_char(&value, n);
+    } else {
+        return 0;
+    }
+}
+static size_t
+yp_encoding_big5_alnum_char(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
+    if (width == 1) {
+        const char value = (const char) codepoint;
+        return yp_encoding_ascii_alnum_char(&value, n);
+    } else {
+        return 0;
+    }
+}
+static bool
+yp_encoding_big5_isupper_char(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
+    if (width == 1) {
+        const char value = (const char) codepoint;
+        return yp_encoding_ascii_isupper_char(&value, n);
+    } else {
+        return false;
+    }
+}
+yp_encoding_t yp_encoding_big5 = {
+    .name = "big5",
+    .char_width = yp_encoding_big5_char_width,
+    .alnum_char = yp_encoding_big5_alnum_char,
+    .alpha_char = yp_encoding_big5_alpha_char,
+    .isupper_char = yp_encoding_big5_isupper_char,
+    .multibyte = true
+};

data/src/enc/yp_euc_jp.c ADDED Viewed

@@ -0,0 +1,85 @@
+#include "yarp/enc/yp_encoding.h"
+typedef uint16_t yp_euc_jp_codepoint_t;
+static yp_euc_jp_codepoint_t
+yp_euc_jp_codepoint(const char *c, ptrdiff_t n, size_t *width) {
+    const unsigned char *uc = (const unsigned char *) c;
+    // These are the single byte characters.
+    if (*uc < 0x80) {
+        *width = 1;
+        return *uc;
+    }
+    // These are the double byte characters.
+    if (
+        (n > 1) &&
+        (
+            ((uc[0] == 0x8E) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) ||
+            ((uc[0] >= 0xA1 && uc[0] <= 0xFE) && (uc[1] >= 0xA1 && uc[1] <= 0xFE))
+        )
+    ) {
+        *width = 2;
+        return (yp_euc_jp_codepoint_t) (uc[0] << 8 | uc[1]);
+    }
+    *width = 0;
+    return 0;
+}
+static size_t
+yp_encoding_euc_jp_char_width(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_euc_jp_codepoint(c, n, &width);
+    return width;
+}
+static size_t
+yp_encoding_euc_jp_alpha_char(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
+    if (width == 1) {
+        const char value = (const char) codepoint;
+        return yp_encoding_ascii_alpha_char(&value, n);
+    } else {
+        return 0;
+    }
+}
+static size_t
+yp_encoding_euc_jp_alnum_char(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
+    if (width == 1) {
+        const char value = (const char) codepoint;
+        return yp_encoding_ascii_alnum_char(&value, n);
+    } else {
+        return 0;
+    }
+}
+static bool
+yp_encoding_euc_jp_isupper_char(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
+    if (width == 1) {
+        const char value = (const char) codepoint;
+        return yp_encoding_ascii_isupper_char(&value, n);
+    } else {
+        return 0;
+    }
+}
+yp_encoding_t yp_encoding_euc_jp = {
+    .name = "euc-jp",
+    .char_width = yp_encoding_euc_jp_char_width,
+    .alnum_char = yp_encoding_euc_jp_alnum_char,
+    .alpha_char = yp_encoding_euc_jp_alpha_char,
+    .isupper_char = yp_encoding_euc_jp_isupper_char,
+    .multibyte = true
+};

data/src/enc/yp_gbk.c ADDED Viewed

@@ -0,0 +1,88 @@
+#include "yarp/enc/yp_encoding.h"
+typedef uint16_t yp_gbk_codepoint_t;
+static yp_gbk_codepoint_t
+yp_gbk_codepoint(const char *c, ptrdiff_t n, size_t *width) {
+    const unsigned char *uc = (const unsigned char *) c;
+    // These are the single byte characters.
+    if (*uc < 0x80) {
+        *width = 1;
+        return *uc;
+    }
+    // These are the double byte characters.
+    if (
+        (n > 1) &&
+        (
+            ((uc[0] >= 0xA1 && uc[0] <= 0xA9) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) || // GBK/1
+            ((uc[0] >= 0xB0 && uc[0] <= 0xF7) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) || // GBK/2
+            ((uc[0] >= 0x81 && uc[0] <= 0xA0) && (uc[1] >= 0x40 && uc[1] <= 0xFE) && (uc[1] != 0x7F)) || // GBK/3
+            ((uc[0] >= 0xAA && uc[0] <= 0xFE) && (uc[1] >= 0x40 && uc[1] <= 0xA0) && (uc[1] != 0x7F)) || // GBK/4
+            ((uc[0] >= 0xA8 && uc[0] <= 0xA9) && (uc[1] >= 0x40 && uc[1] <= 0xA0) && (uc[1] != 0x7F)) // GBK/5
+        )
+    ) {
+        *width = 2;
+        return (yp_gbk_codepoint_t) (uc[0] << 8 | uc[1]);
+    }
+    *width = 0;
+    return 0;
+}
+static size_t
+yp_encoding_gbk_char_width(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_gbk_codepoint(c, n, &width);
+    return width;
+}
+static size_t
+yp_encoding_gbk_alpha_char(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
+    if (width == 1) {
+        const char value = (const char) codepoint;
+        return yp_encoding_ascii_alpha_char(&value, n);
+    } else {
+        return 0;
+    }
+}
+static size_t
+yp_encoding_gbk_alnum_char(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
+    if (width == 1) {
+        const char value = (const char) codepoint;
+        return yp_encoding_ascii_alnum_char(&value, n);
+    } else {
+        return 0;
+    }
+}
+static bool
+yp_encoding_gbk_isupper_char(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
+    if (width == 1) {
+        const char value = (const char) codepoint;
+        return yp_encoding_ascii_isupper_char(&value, n);
+    } else {
+        return false;
+    }
+}
+yp_encoding_t yp_encoding_gbk = {
+    .name = "gbk",
+    .char_width = yp_encoding_gbk_char_width,
+    .alnum_char = yp_encoding_gbk_alnum_char,
+    .alpha_char = yp_encoding_gbk_alpha_char,
+    .isupper_char = yp_encoding_gbk_isupper_char,
+    .multibyte = true
+};

data/src/enc/yp_shift_jis.c ADDED Viewed

@@ -0,0 +1,83 @@
+#include "yarp/enc/yp_encoding.h"
+typedef uint16_t yp_shift_jis_codepoint_t;
+static yp_shift_jis_codepoint_t
+yp_shift_jis_codepoint(const char *c, ptrdiff_t n, size_t *width) {
+    const unsigned char *uc = (const unsigned char *) c;
+    // These are the single byte characters.
+    if (*uc < 0x80 || (*uc >= 0xA1 && *uc <= 0xDF)) {
+        *width = 1;
+        return *uc;
+    }
+    // These are the double byte characters.
+    if (
+        (n > 1) &&
+        ((uc[0] >= 0x81 && uc[0] <= 0x9F) || (uc[0] >= 0xE0 && uc[0] <= 0xFC)) &&
+        (uc[1] >= 0x40 && uc[1] <= 0xFC)
+    ) {
+        *width = 2;
+        return (yp_shift_jis_codepoint_t) (uc[0] << 8 | uc[1]);
+    }
+    *width = 0;
+    return 0;
+}
+static size_t
+yp_encoding_shift_jis_char_width(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_shift_jis_codepoint(c, n, &width);
+    return width;
+}
+static size_t
+yp_encoding_shift_jis_alpha_char(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
+    if (width == 1) {
+        const char value = (const char) codepoint;
+        return yp_encoding_ascii_alpha_char(&value, n);
+    } else {
+        return 0;
+    }
+}
+static size_t
+yp_encoding_shift_jis_alnum_char(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
+    if (width == 1) {
+        const char value = (const char) codepoint;
+        return yp_encoding_ascii_alnum_char(&value, n);
+    } else {
+        return 0;
+    }
+}
+static bool
+yp_encoding_shift_jis_isupper_char(const char *c, ptrdiff_t n) {
+    size_t width;
+    yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
+    if (width == 1) {
+        const char value = (const char) codepoint;
+        return yp_encoding_ascii_isupper_char(&value, n);
+    } else {
+        return 0;
+    }
+}
+yp_encoding_t yp_encoding_shift_jis = {
+    .name = "shift_jis",
+    .char_width = yp_encoding_shift_jis_char_width,
+    .alnum_char = yp_encoding_shift_jis_alnum_char,
+    .alpha_char = yp_encoding_shift_jis_alpha_char,
+    .isupper_char = yp_encoding_shift_jis_isupper_char,
+    .multibyte = true
+};