RubyGems - marshal-parser - Versions diffs - 0.1.0 - Mend

marshal-parser 0.1.0

Files changed (26) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +5 -0
data/CODE_OF_CONDUCT.md +84 -0
data/LICENSE.txt +21 -0
data/README.md +202 -0
data/bin/console +15 -0
data/bin/marshal-cli +6 -0
data/bin/setup +8 -0
data/lib/marshal-parser/assertable.rb +9 -0
data/lib/marshal-parser/cli/commands.rb +108 -0
data/lib/marshal-parser/formatters/ast/only_tokens.rb +60 -0
data/lib/marshal-parser/formatters/ast/renderers/entries_block.rb +17 -0
data/lib/marshal-parser/formatters/ast/renderers/line.rb +17 -0
data/lib/marshal-parser/formatters/ast/renderers/line_with_annotation.rb +18 -0
data/lib/marshal-parser/formatters/ast/renderers/renderer.rb +38 -0
data/lib/marshal-parser/formatters/ast/renderers/renderer_with_annotations.rb +52 -0
data/lib/marshal-parser/formatters/ast/sexpression.rb +82 -0
data/lib/marshal-parser/formatters/ast/sexpression_compact.rb +90 -0
data/lib/marshal-parser/formatters/symbols/table.rb +19 -0
data/lib/marshal-parser/formatters/tokens/one_line.rb +21 -0
data/lib/marshal-parser/formatters/tokens/with_description.rb +61 -0
data/lib/marshal-parser/lexer.rb +331 -0
data/lib/marshal-parser/parser.rb +880 -0
data/lib/marshal-parser/version.rb +5 -0
data/lib/marshal-parser.rb +22 -0
metadata +85 -0

data/lib/marshal-parser/formatters/ast/sexpression.rb ADDED Viewed

@@ -0,0 +1,82 @@
+# frozen_string_literal: true
+module MarshalParser
+  module Formatters
+    module AST
+      class SExpression
+        def initialize(node, source_string, renderer)
+          @node = node
+          @source_string = source_string
+          @renderer = renderer
+        end
+        def string
+          entries = node_to_entries(@node)
+          block = Renderers::EntriesBlock.new(entries)
+          @renderer.render(block)
+        end
+        private
+        def node_to_entries(node)
+          child_entries = node.child_entities
+            .select { |e| (e.is_a?(Lexer::Token) && node.attributes.key?(e)) || e.is_a?(Parser::Node) }
+            .map do |entry|
+              case entry
+              when Lexer::Token
+                options = node.attributes[entry]
+                name = options[:name]
+                value = options[:value]
+                name = name.to_s.gsub(/_/, "-")
+                if value.is_a?(Lexer::Token)
+                  value = @source_string[entry.index, entry.length].dump
+                end
+                Renderers::Line.new("(#{name} #{value})")
+              when Parser::Node
+                node_to_entries(entry)
+              end
+            end.flatten
+          name = node_to_name(node)
+          entries = [Renderers::Line.new("(#{name}")] + child_entries
+          close_bracket(entries.last)
+          raise "Expected 1st entry to be Line" unless entries[0].is_a?(Renderers::Line)
+          if node.is_a?(Parser::Annotatable)
+            string = entries[0].string
+            annotation = node.annotation
+            entries[0] = Renderers::LineAnnotated.new(string, annotation)
+          end
+          if entries.size > 1
+            [entries[0], Renderers::EntriesBlock.new(entries[1..])]
+          else
+            entries
+          end
+        end
+        # MarshalParser::Parser::ObjectWithMarshalDumpMethod -> object-with-marshal-dump-method
+        def node_to_name(node)
+          node.class.name.to_s
+              .split("::").last
+              .sub(/Node\Z/, "")
+              .gsub(/([a-z])([A-Z])/, '\1-\2')
+              .downcase
+        end
+        def close_bracket(entry)
+          case entry
+          when Renderers::Line
+            entry.string << ")"
+          when Renderers::EntriesBlock
+            close_bracket(entry.entries.last)
+          end
+        end
+      end
+    end
+  end
+end

data/lib/marshal-parser/formatters/ast/sexpression_compact.rb ADDED Viewed

@@ -0,0 +1,90 @@
+# frozen_string_literal: true
+module MarshalParser
+  module Formatters
+    module AST
+      class SExpressionCompact
+        def initialize(node, source_string, renderer)
+          @node = node
+          @source_string = source_string
+          @renderer = renderer
+        end
+        def string
+          entries = node_to_entries(@node)
+          block = Renderers::EntriesBlock.new(entries)
+          @renderer.render(block)
+        end
+        private
+        def node_to_entries(node)
+          child_entries = node.child_entities
+            .select { |e| e.is_a?(Parser::Node) || e == node.literal_token }
+            .map do |e|
+              if e.is_a?(Parser::Node)
+                node_to_entries(e)
+              else
+                literal_token = node.literal_token
+                value = node.attributes[literal_token][:value]
+                if value.is_a?(Lexer::Token)
+                  content = @source_string[value.index, value.length].dump
+                else
+                  content = value.to_s
+                end
+                Renderers::Line.new(content)
+              end
+            end
+            .flatten
+          name = node_to_name(node)
+          entries = [Renderers::Line.new(name)] + child_entries
+          if node.literal_token && (entries.size == 2 && entries.all?(Renderers::Line))
+            strings = entries.map(&:string)
+            entries = [Renderers::Line.new(strings.join(" "))]
+          end
+          unless node.always_leaf?
+            entries[0] = Renderers::Line.new("(#{entries[0].string}")
+            close_bracket(entries.last)
+          end
+          raise "Expected 1st entry to be Line" unless entries[0].is_a?(Renderers::Line)
+          if node.is_a?(Parser::Annotatable)
+            string = entries[0].string
+            annotation = node.annotation
+            entries[0] = Renderers::LineAnnotated.new(string, annotation)
+          end
+          if entries.size > 1
+            [entries[0], Renderers::EntriesBlock.new(entries[1..])]
+          else
+            entries
+          end
+        end
+        # MarshalParser::Parser::ObjectWithMarshalDumpMethod -> object-with-marshal-dump-method
+        def node_to_name(node)
+          node.class.name.to_s
+              .split("::").last
+              .sub(/Node\Z/, "")
+              .gsub(/([a-z])([A-Z])/, '\1-\2')
+              .downcase
+        end
+        def close_bracket(entry)
+          case entry
+          when Renderers::Line
+            entry.string << ")"
+          when Renderers::EntriesBlock
+            close_bracket(entry.entries.last)
+          end
+        end
+      end
+    end
+  end
+end

data/lib/marshal-parser/formatters/symbols/table.rb ADDED Viewed

@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+module MarshalParser
+  module Formatters
+    module Symbols
+      class Table
+        def initialize(symbols)
+          @symbols = symbols
+        end
+        def string
+          @symbols.map.with_index do |symbol, i|
+            "%-4d - :%s" % [i, symbol]
+          end.join("\n")
+        end
+      end
+    end
+  end
+end

data/lib/marshal-parser/formatters/tokens/one_line.rb ADDED Viewed

@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+module MarshalParser
+  module Formatters
+    module Tokens
+      class OneLine
+        def initialize(tokens, source_string)
+          @tokens = tokens
+          @source_string = source_string
+        end
+        def string
+          @tokens.map do |token|
+            string = @source_string[token.index, token.length]
+            string =~ /[^[:print:]]/ ? string.dump : string
+          end.join(" ")
+        end
+      end
+    end
+  end
+end

data/lib/marshal-parser/formatters/tokens/with_description.rb ADDED Viewed

@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+module MarshalParser
+  module Formatters
+    module Tokens
+      class WithDescription
+        def initialize(tokens, source_string)
+          @tokens = tokens
+          @source_string = source_string
+        end
+        def string
+          @tokens.map do |token|
+            string = @source_string[token.index, token.length].dump
+            description = self.class.token_description(token.id)
+            value = token.value ? " (#{token.value})" : ""
+            "%-10s - %s%s" % [string, description, value]
+          end.join("\n")
+        end
+        def self.token_description(token)
+          case token
+          when Lexer::VERSION                           then "Version"
+          when Lexer::ARRAY_PREFIX                      then "Array beginning"
+          when Lexer::OBJECT_WITH_IVARS_PREFIX          then "Special object with instance variables"
+          when Lexer::OBJECT_WITH_DUMP_PREFIX           then "Object with #_dump and .load"
+          when Lexer::OBJECT_WITH_MARSHAL_DUMP_PREFIX   then "Object with #marshal_dump and #marshal_load"
+          when Lexer::STRING_PREFIX                     then "String beginning"
+          when Lexer::HASH_PREFIX                       then "Hash beginning"
+          when Lexer::HASH_WITH_DEFAULT_VALUE_PREFIX    then "Hash beginning (with defaul value)"
+          when Lexer::REGEXP_PREFIX                     then "Regexp beginning"
+          when Lexer::STRUCT_PREFIX                     then "Struct beginning"
+          when Lexer::TRUE                              then "true"
+          when Lexer::FALSE                             then "false"
+          when Lexer::NIL                               then "nil"
+          when Lexer::FLOAT_PREFIX                      then "Float beginning"
+          when Lexer::INTEGER_PREFIX                    then "Integer beginning"
+          when Lexer::BIG_INTEGER_PREFIX                then "Big Integer beginning"
+          when Lexer::SYMBOL_PREFIX                     then "Symbol beginning"
+          when Lexer::SYMBOL_LINK_PREFIX                then "Link to Symbol"
+          when Lexer::CLASS_PREFIX                      then "Class beginning"
+          when Lexer::MODULE_PREFIX                     then "Module beginning"
+          when Lexer::OBJECT_PREFIX                     then "Object beginning"
+          when Lexer::OBJECT_LINK_PREFIX                then "Link to object"
+          when Lexer::OBJECT_EXTENDED_PREFIX            then "Object extended with a module"
+          when Lexer::SUBCLASS_OF_CORE_LIBRARY_CLASS_PREFIX then "Instance of a Core Library class subclass beginning"
+          when Lexer::FLOAT                             then "Float string representation"
+          when Lexer::INTEGER                           then "Integer encoded"
+          when Lexer::BIG_INTEGER                       then "Big Integer encoded"
+          when Lexer::STRING                            then "String characters"
+          when Lexer::SYMBOL                            then "Symbol characters"
+          when Lexer::PLUS_SIGN                         then "Sign '+'"
+          when Lexer::MINUS_SIGN                        then "Sign '-'"
+          when Lexer::UNKNOWN_SIGN                      then "Unknown sign (internal error)"
+          end
+        end
+      end
+    end
+  end
+end

data/lib/marshal-parser/lexer.rb ADDED Viewed

@@ -0,0 +1,331 @@
+# frozen_string_literal: true
+module MarshalParser
+  class Lexer
+    # assign values 0, 1, 2, ...
+    VERSION,
+      ARRAY_PREFIX,
+      OBJECT_WITH_IVARS_PREFIX,
+      OBJECT_WITH_DUMP_PREFIX,
+      OBJECT_WITH_MARSHAL_DUMP_PREFIX,
+      STRING_PREFIX,
+      HASH_PREFIX,
+      HASH_WITH_DEFAULT_VALUE_PREFIX,
+      REGEXP_PREFIX,
+      STRUCT_PREFIX,
+      TRUE,
+      FALSE,
+      NIL,
+      FLOAT_PREFIX,
+      INTEGER_PREFIX,
+      BIG_INTEGER_PREFIX,
+      SYMBOL_PREFIX,
+      SYMBOL_LINK_PREFIX,
+      CLASS_PREFIX,
+      MODULE_PREFIX,
+      OBJECT_PREFIX,
+      OBJECT_LINK_PREFIX,
+      OBJECT_EXTENDED_PREFIX,
+      SUBCLASS_OF_CORE_LIBRARY_CLASS_PREFIX,
+      FLOAT,
+      INTEGER,
+      BIG_INTEGER,
+      STRING,
+      SYMBOL,
+      PLUS_SIGN,
+      MINUS_SIGN,
+      UNKNOWN_SIGN = (0..100).to_a
+    Token = Struct.new(:id, :index, :length, :value)
+    attr_reader :tokens
+    def initialize(source_string)
+      @dump = source_string
+      @tokens = []
+    end
+    def run
+      @index = 0
+      @tokens = []
+      read_version
+      read
+    end
+    def source_string
+      @dump
+    end
+    private
+    def read_version
+      version = @dump[@index, 2]
+      version_unpacked = version.unpack("CC").join(".")
+      @tokens << Token.new(VERSION, @index, 2, version_unpacked)
+      @index += 2
+    end
+    def read
+      c = @dump[@index]
+      @index += 1
+      case c
+      when "["
+        @tokens << Token.new(ARRAY_PREFIX, @index - 1, 1)
+        read_array
+      when "I"
+        @tokens << Token.new(OBJECT_WITH_IVARS_PREFIX, @index - 1, 1)
+        read_object_with_instance_variables
+      when '"'
+        @tokens << Token.new(STRING_PREFIX, @index - 1, 1)
+        read_string
+      when "{"
+        @tokens << Token.new(HASH_PREFIX, @index - 1, 1)
+        read_hash
+      when "}"
+        @tokens << Token.new(HASH_WITH_DEFAULT_VALUE_PREFIX, @index - 1, 1)
+        read_hash_with_default_value
+      when "/"
+        @tokens << Token.new(REGEXP_PREFIX, @index - 1, 1)
+        read_regexp
+      when "S"
+        @tokens << Token.new(STRUCT_PREFIX, @index - 1, 1)
+        read_struct
+      when "T"
+        @tokens << Token.new(TRUE, @index - 1, 1)
+      when "F"
+        @tokens << Token.new(FALSE, @index - 1, 1)
+      when "0"
+        @tokens << Token.new(NIL, @index - 1, 1)
+      when ":"
+        @tokens << Token.new(SYMBOL_PREFIX, @index - 1, 1)
+        read_symbol
+      when ";"
+        @tokens << Token.new(SYMBOL_LINK_PREFIX, @index - 1, 1)
+        read_symbol_link
+      when "f"
+        @tokens << Token.new(FLOAT_PREFIX, @index - 1, 1)
+        read_float
+      when "i"
+        @tokens << Token.new(INTEGER_PREFIX, @index - 1, 1)
+        read_integer
+      when "l"
+        @tokens << Token.new(BIG_INTEGER_PREFIX, @index - 1, 1)
+        read_big_integer
+      when "c"
+        @tokens << Token.new(CLASS_PREFIX, @index - 1, 1)
+        read_class
+      when "m"
+        @tokens << Token.new(MODULE_PREFIX, @index - 1, 1)
+        read_module
+      when "C"
+        @tokens << Token.new(SUBCLASS_OF_CORE_LIBRARY_CLASS_PREFIX, @index - 1, 1)
+        read_object_of_subclass_of_core_library_class
+      when "o"
+        @tokens << Token.new(OBJECT_PREFIX, @index - 1, 1)
+        read_object
+      when "@"
+        @tokens << Token.new(OBJECT_LINK_PREFIX, @index - 1, 1)
+        read_integer
+      when "e"
+        @tokens << Token.new(OBJECT_EXTENDED_PREFIX, @index - 1, 1)
+        read_object_extended
+      when "u"
+        @tokens << Token.new(OBJECT_WITH_DUMP_PREFIX, @index - 1, 1)
+        read_object_with_dump
+      when "U"
+        @tokens << Token.new(OBJECT_WITH_MARSHAL_DUMP_PREFIX, @index - 1, 1)
+        read_object_with_marshal_dump
+      else
+        raise "Unexpected character #{c.dump} (index=#{@index - 1})"
+      end
+    end
+    def read_array
+      count = read_integer
+      elements = (1..count).map { read }
+    end
+    def read_integer
+      index_base = @index
+      i = @dump[@index].unpack1("c")
+      @index += 1
+      case i
+      when 0
+        value = 0
+      when 1
+        value = @dump[@index].bytes[0]
+        @index += 1
+      when -1
+        value = @dump[@index].bytes[0] - 255 - 1
+        @index += 1
+      when 2
+        value = @dump[@index, 2].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte }
+        @index += 2
+      when -2
+        value = @dump[@index, 2].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte } - 0xFF_FF - 1
+        @index += 2
+      when 3
+        value = @dump[@index, 3].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte }
+        @index += 3
+      when -3
+        value = @dump[@index, 3].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte } - 0xFF_FF_FF - 1
+        @index += 3
+      when 4
+        value = @dump[@index, 4].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte }
+        @index += 4
+      when -4
+        value = @dump[@index, 4].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte } - 0xFF_FF_FF_FF - 1
+        @index += 4
+      else
+        value = i > 0 ? i - 5 : i + 5
+      end
+      @tokens << Token.new(INTEGER, index_base, @index - index_base, value)
+      value
+    end
+    def read_big_integer
+      sign = read_sign
+      i = read_integer
+      length = i * 2
+      value = @dump[@index, length].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte }
+      value = -value if sign.id == MINUS_SIGN
+      @tokens << Token.new(BIG_INTEGER, @index, length, value)
+      @index += length
+    end
+    def read_sign
+      c = @dump[@index]
+      token = \
+        case c
+        when "+"
+          Token.new(PLUS_SIGN, @index, 1)
+        when "-"
+          Token.new(MINUS_SIGN, @index, 1)
+        else
+          Token.new(UNKNOWN_SIGN, @index, 1)
+        end
+      @tokens << token
+      @index += 1
+      token
+    end
+    def read_object_with_instance_variables
+      object = read
+      ivars_count = read_integer
+      ivars_count.times do
+        name = read
+        value = read
+      end
+    end
+    def read_string
+      length = read_integer
+      @tokens << Token.new(STRING, @index, length)
+      @index += length
+    end
+    def read_symbol
+      length = read_integer
+      @tokens << Token.new(SYMBOL, @index, length)
+      @index += length
+    end
+    def read_symbol_link
+      read_integer
+    end
+    def read_hash
+      pairs_count = read_integer
+      pairs_count.times do
+        key = read
+        value = read
+      end
+    end
+    def read_hash_with_default_value
+      pairs_count = read_integer
+      pairs_count.times do
+        key = read
+        value = read
+      end
+      read # read devault value - any object
+    end
+    def read_regexp
+      read_string # read Regexp's source
+      read_integer # read flags
+    end
+    def read_struct
+      read # read symbol (class name)
+      member_count = read_integer
+      member_count.times do
+        read # read symbol (member name)
+        read # read object (member value)
+      end
+    end
+    def read_float
+      length = read_integer
+      string = @dump[@index, length]
+      @tokens << Token.new(FLOAT, @index, length, Float(string))
+      @index += length
+    end
+    def read_class
+      length = read_integer
+      @tokens << Token.new(STRING, @index, length)
+      @index += length
+    end
+    def read_module
+      length = read_integer
+      @tokens << Token.new(STRING, @index, length)
+      @index += length
+    end
+    def read_object_of_subclass_of_core_library_class
+      read # read symbol (class name)
+      read # read object
+    end
+    def read_object
+      read # read symbol (class name)
+      ivars_count = read_integer
+      ivars_count.times do
+        name = read
+        value = read
+      end
+    end
+    def read_object_extended
+      read # read symbol (module name)
+      read # read object itself
+    end
+    def read_object_with_dump
+      read # read symbol (class name)
+      read_string # read dumped string
+    end
+    def read_object_with_marshal_dump
+      read # read symbol (class name)
+      read # read object (what #marshal_dump returned)
+    end
+  end
+end