RubyGems - heap-profiler - Versions diffs - 0.8.0.rc1-x86_64-linux - Mend

heap-profiler 0.8.0.rc1-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

checksums.yaml +7 -0
data/.github/workflows/cibuildgem.yaml +87 -0
data/.github/workflows/tests.yml +33 -0
data/.gitignore +11 -0
data/.rubocop.yml +29 -0
data/.ruby-version +1 -0
data/Gemfile +13 -0
data/Gemfile.lock +69 -0
data/LICENSE.txt +21 -0
data/README.md +291 -0
data/Rakefile +17 -0
data/TODO.md +3 -0
data/benchmark/address-parsing.rb +15 -0
data/benchmark/indexing.rb +17 -0
data/bin/console +15 -0
data/bin/generate-report +49 -0
data/bin/rubocop +29 -0
data/bin/setup +8 -0
data/bin/testunit +9 -0
data/dev.yml +20 -0
data/exe/heap-profiler +5 -0
data/ext/heap_profiler/extconf.rb +9 -0
data/ext/heap_profiler/heap_profiler.cpp +335 -0
data/ext/heap_profiler/simdjson.cpp +15047 -0
data/ext/heap_profiler/simdjson.h +32071 -0
data/heap-profiler.gemspec +31 -0
data/lib/heap-profiler.rb +6 -0
data/lib/heap_profiler/3.1/heap_profiler.so +0 -0
data/lib/heap_profiler/3.2/heap_profiler.so +0 -0
data/lib/heap_profiler/3.3/heap_profiler.so +0 -0
data/lib/heap_profiler/3.4/heap_profiler.so +0 -0
data/lib/heap_profiler/4.0/heap_profiler.so +0 -0
data/lib/heap_profiler/analyzer.rb +232 -0
data/lib/heap_profiler/cli.rb +140 -0
data/lib/heap_profiler/diff.rb +39 -0
data/lib/heap_profiler/dump.rb +97 -0
data/lib/heap_profiler/full.rb +12 -0
data/lib/heap_profiler/index.rb +89 -0
data/lib/heap_profiler/monochrome.rb +19 -0
data/lib/heap_profiler/parser.rb +83 -0
data/lib/heap_profiler/polychrome.rb +93 -0
data/lib/heap_profiler/reporter.rb +118 -0
data/lib/heap_profiler/results.rb +256 -0
data/lib/heap_profiler/runtime.rb +30 -0
data/lib/heap_profiler/version.rb +4 -0
metadata +94 -0

data/heap-profiler.gemspec ADDED Viewed

@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+require_relative 'lib/heap_profiler/version'
+Gem::Specification.new do |spec|
+  spec.name          = "heap-profiler"
+  spec.version       = HeapProfiler::VERSION
+  spec.authors       = ["Jean Boussier"]
+  spec.email         = ["jean.boussier@gmail.com"]
+  spec.summary       = 'Ruby heap profiling tool'
+  spec.description   = 'Make several heap dumps and summarize allocated, retained memory'
+  spec.homepage      = "https://github.com/Shopify/heap-profiler"
+  spec.license       = "MIT"
+  spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
+  spec.metadata["allowed_push_host"] = "https://rubygems.org/"
+  spec.metadata["homepage_uri"] = spec.homepage
+  spec.metadata["source_code_uri"] = spec.homepage
+  # Specify which files should be added to the gem when it is released.
+  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
+  spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
+    %x(git ls-files -z).split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
+  end
+  spec.bindir        = "exe"
+  spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
+  spec.require_paths = ["lib"]
+  spec.extensions << "ext/heap_profiler/extconf.rb"
+end

data/lib/heap-profiler.rb ADDED Viewed

@@ -0,0 +1,6 @@
+# frozen_string_literal: true
+# This file is the gem entrypoint loaded by bundler.
+# We only load what's needed to take heap dumps,
+# not the code required to analyse them.
+require "heap_profiler/runtime"

data/lib/heap_profiler/3.1/heap_profiler.so ADDED Viewed

Binary file

data/lib/heap_profiler/3.2/heap_profiler.so ADDED Viewed

Binary file

data/lib/heap_profiler/3.3/heap_profiler.so ADDED Viewed

Binary file

data/lib/heap_profiler/3.4/heap_profiler.so ADDED Viewed

Binary file

data/lib/heap_profiler/4.0/heap_profiler.so ADDED Viewed

Binary file

data/lib/heap_profiler/analyzer.rb ADDED Viewed

@@ -0,0 +1,232 @@
+# frozen_string_literal: true
+module HeapProfiler
+  class Analyzer
+    class Dimension
+      attr_reader :objects, :memory
+      def initialize
+        @objects = 0
+        @memory = 0
+      end
+      def process(_index, object)
+        @objects += 1
+        @memory += object[:memsize]
+      end
+      def stats(metric)
+        case metric
+        when "objects"
+          objects
+        when "memory"
+          memory
+        else
+          raise "Invalid metric: #{metric.inspect}"
+        end
+      end
+    end
+    class GroupedDimension < Dimension
+      class << self
+        def build(grouping)
+          klass = case grouping
+          when "file"
+            FileGroupDimension
+          when "location"
+            LocationGroupDimension
+          when "gem"
+            GemGroupDimension
+          when "class"
+            ClassGroupDimension
+          else
+            raise "Unknown grouping key: #{grouping.inspect}"
+          end
+          klass.new
+        end
+      end
+      def initialize
+        @objects = Hash.new { |h, k| h[k] = 0 }
+        @memory = Hash.new { |h, k| h[k] = 0 }
+      end
+      def process(index, object)
+        if (group = @grouping.call(index, object))
+          @objects[group] += 1
+          @memory[group] += object[:memsize]
+        end
+      end
+      def top_n(metric, max)
+        values = stats(metric).sort do |a, b|
+          b[1] <=> a[1]
+        end
+        top = values.take(max)
+        top.sort! do |a, b|
+          cmp = b[1] <=> a[1]
+          cmp == 0 ? b[0] <=> a[0] : cmp
+        end
+        top
+      end
+    end
+    class FileGroupDimension < GroupedDimension
+      def process(_index, object)
+        if (group = object[:file])
+          @objects[group] += 1
+          @memory[group] += object[:memsize]
+        end
+      end
+    end
+    class LocationGroupDimension < GroupedDimension
+      def process(_index, object)
+        file = object[:file]
+        line = object[:line]
+        if file && line
+          group = "#{file}:#{line}"
+          @objects[group] += 1
+          @memory[group] += object[:memsize]
+        end
+      end
+    end
+    class GemGroupDimension < GroupedDimension
+      def process(index, object)
+        if (group = index.guess_gem(object))
+          @objects[group] += 1
+          @memory[group] += object[:memsize]
+        end
+      end
+    end
+    class ClassGroupDimension < GroupedDimension
+      def process(index, object)
+        if (group = index.guess_class(object))
+          @objects[group] += 1
+          @memory[group] += object[:memsize]
+        end
+      end
+    end
+    class StringDimension
+      class StringLocation
+        attr_reader :location, :count, :memsize
+        def initialize(location)
+          @location = location
+          @count = 0
+          @memsize = 0
+        end
+        def process(object)
+          @count += 1
+          @memsize += object[:memsize]
+        end
+      end
+      class StringGroup
+        attr_reader :value, :count, :memsize, :locations
+        def initialize(value) # TODO: should we consider encoding?
+          @value = value
+          @locations_counts = Hash.new { |h, k| h[k] = StringLocation.new(k) }
+          @count = 0
+          @memsize = 0
+        end
+        def process(object)
+          @count += 1
+          @memsize += object[:memsize]
+          if (file = object[:file]) && (line = object[:line])
+            @locations_counts["#{file}:#{line}"].process(object)
+          end
+        end
+        def top_n(max)
+          values = @locations_counts.values
+          values.sort! do |a, b|
+            cmp = b.count <=> a.count
+            cmp == 0 ? b.location <=> a.location : cmp
+          end
+          values.take(max)
+        end
+      end
+      attr_reader :stats
+      def initialize
+        @stats = Hash.new { |h, k| h[k] = StringGroup.new(k) }
+      end
+      def process(_index, object)
+        return unless object[:type] == :STRING
+        value = object[:value]
+        return unless value # broken strings etc
+        @stats[value].process(object)
+      end
+      def top_n(max)
+        values = @stats.values
+        values.sort! do |a, b|
+          b.count <=> a.count
+        end
+        top = values.take(max)
+        top.sort! do |a, b|
+          cmp = b.count <=> a.count
+          cmp == 0 ? b.value <=> a.value : cmp
+        end
+        top
+      end
+    end
+    class ShapeEdgeDimension
+      def initialize
+        @stats = Hash.new(0)
+      end
+      def process(_index, object)
+        if name = object[:edge_name]
+          @stats[name] += 1
+        end
+      end
+      def top_n(max)
+        @stats.sort do |(a_name, a_count), (b_name, b_count)|
+          cmp = b_count <=> a_count
+          if cmp == 0
+            a_name <=> b_name
+          else
+            cmp
+          end
+        end.take(max)
+      end
+    end
+    def initialize(heap, index)
+      @heap = heap
+      @index = index
+    end
+    def run(metrics, groupings)
+      dimensions = {}
+      metrics.each do |metric|
+        if metric == "strings"
+          dimensions["strings"] = StringDimension.new
+        elsif metric == "shape_edges"
+          dimensions["shape_edges"] = ShapeEdgeDimension.new
+        else
+          dimensions["total"] = Dimension.new
+          groupings.each do |grouping|
+            dimensions[grouping] = GroupedDimension.build(grouping)
+          end
+        end
+      end
+      processors = dimensions.values
+      @heap.each_object do |object|
+        processors.each { |p| p.process(@index, object) }
+      end
+      dimensions
+    end
+  end
+end

data/lib/heap_profiler/cli.rb ADDED Viewed

@@ -0,0 +1,140 @@
+# frozen_string_literal: true
+require 'optparse'
+module HeapProfiler
+  class CLI
+    def initialize(argv)
+      @argv = argv
+    end
+    def run
+      parser.parse!(@argv)
+      begin
+        case @argv.first
+        when "clean"
+          clean_dump(@argv[1])
+          return 0
+        when "report"
+          print_report(@argv[1])
+          return 0
+        else
+          if @argv.size == 1
+            print_report(@argv.first)
+            return 0
+          end
+        end
+      rescue CapacityError => error
+        STDERR.puts(error.message)
+        STDERR.puts("Current size: #{Parser.batch_size}B")
+        STDERR.puts("Try increasing it with --batch-size")
+        STDERR.puts
+      end
+      print_usage
+      1
+    end
+    def print_report(path)
+      results = if File.directory?(path)
+        if @retained_only
+          DiffResults.new(path, ["retained"])
+        else
+          DiffResults.new(path)
+        end
+      else
+        HeapResults.new(path)
+      end
+      results.pretty_print(scale_bytes: true, normalize_paths: true)
+    end
+    def clean_dump(path)
+      require "json"
+      errors = index = 0
+      clean_path = "#{path}.clean"
+      File.open(clean_path, "w+") do |output|
+        File.open(path) do |input|
+          input.each_line do |line|
+            begin
+              JSON.parse(line)
+            rescue JSON::ParserError
+              errors += 1
+              $stderr.puts("Invalid JSON found on line #{index}. Skipping")
+            else
+              output.print(line)
+            end
+            index += 1
+          end
+        end
+      end
+      $stderr.puts("Processed #{index} lines, removed #{errors} invalid lines")
+      $stderr.puts("Clean dump available at #{clean_path}")
+    end
+    def print_usage
+      puts "Usage: #{$PROGRAM_NAME} directory_or_heap_dump"
+      puts @parser.help
+    end
+    SIZE_UNITS = {
+      'B' => 1,
+      'K' => 1_000,
+      'M' => 1_000_000,
+      'G' => 1_000_000_000,
+    }
+    def parse_byte_size(size_string)
+      if (match = size_string.match(/\A(\d+)(\w)?B?\z/i))
+        digits = Float(match[1])
+        base = 1
+        unit = match[2]&.upcase
+        if unit
+          base = SIZE_UNITS.fetch(unit) { raise ArgumentError, "Unknown size unit: #{unit}" }
+        end
+        size = (digits * base).to_i
+        if size > 4_000_000_000
+          raise ArgumentError, "Batch size can't be bigger than 4G"
+        end
+        size
+      else
+        raise ArgumentError, "#{size_string} is not a valid size"
+      end
+    end
+    private
+    def parser
+      @parser ||= OptionParser.new do |opts|
+        opts.banner = <<~EOS
+          Usage: heap-profiler [SUBCOMMAND] [ARGS]"
+          SUBCOMMANDS
+            report: Produce a full memory report from the provided dump. (default)
+            clean: Remove all malformed lines from the provided heap dump. Can be useful to workaround some ruby bugs.
+          GLOBAL OPTIONS
+        EOS
+        opts.separator ""
+        opts.on('-r', '--retained-only', 'Only compute report for memory retentions.') do
+          @retained_only = true
+        end
+        HeapProfiler::AbstractResults.top_entries_count = 50
+        opts.on("-m", "--max=NUM", Integer, "Max number of entries to output. (Defaults to 50)") do |arg|
+          HeapProfiler::AbstractResults.top_entries_count = arg
+        end
+        help = <<~EOS.lines.join(" ")
+          Sets the simdjson parser batch size. It must be larger than the largest JSON document in the heap dump, and defaults to 10MB.
+        EOS
+        opts.on('--batch-size SIZE', help.strip) do |size_string|
+          HeapProfiler::Parser.batch_size = parse_byte_size(size_string)
+        rescue ArgumentError => error
+          STDERR.puts "Invalid batch-size: #{error.message}"
+          exit 1
+        end
+      end
+    end
+  end
+end

data/lib/heap_profiler/diff.rb ADDED Viewed

@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+module HeapProfiler
+  class Diff
+    class DumpSubset
+      def initialize(path, generation)
+        @path = path
+        @generation = generation
+      end
+      def each_object(&block)
+        Parser.load_many(@path, since: @generation, &block)
+      end
+    end
+    attr_reader :allocated
+    def initialize(report_directory)
+      @report_directory = report_directory
+      @allocated = open_dump('allocated')
+      @generation = Integer(File.read(File.join(report_directory, 'generation.info')))
+      @generation = nil if @generation == 0
+    end
+    def allocated_diff
+      @allocated_diff ||= DumpSubset.new(@allocated.path, @generation)
+    end
+    def retained_diff
+      @retained_diff ||= DumpSubset.new(open_dump('retained').path, @generation)
+    end
+    private
+    def open_dump(name)
+      Dump.open(@report_directory, name)
+    end
+  end
+end

data/lib/heap_profiler/dump.rb ADDED Viewed

@@ -0,0 +1,97 @@
+# frozen_string_literal: true
+module HeapProfiler
+  class Dump
+    class Stats
+      attr_accessor :count, :memsize
+      def process(object)
+        @count += 1
+        @memsize += object.fetch(:memsize, 0)
+      end
+      def initialize
+        @count = 0
+        @memsize = 0
+      end
+    end
+    class GlobalStats < Stats
+      class << self
+        def from(dump)
+          stats = new
+          dump.each_object do |object|
+            stats.process(object)
+          end
+          stats
+        end
+      end
+      def process(object)
+        super
+        per_type[object[:type]].process(object)
+      end
+      def per_type
+        @per_type = Hash.new { |h, k| h[k] = Stats.new }
+      end
+    end
+    class << self
+      def open(dir, name)
+        Dump.new(File.join(dir, "#{name}.heap"))
+      end
+    end
+    attr_reader :path
+    def initialize(path)
+      @path = path
+    end
+    # ObjectSpace.dump_all itself allocate objects.
+    #
+    # Before 2.7 it will allocate one String per class to get its name.
+    # After 2.7, it only allocate a couple hashes, a file etc.
+    #
+    # Either way we need to exclude them from the reports
+    def diff(other, file)
+      each_line_with_address do |line, address|
+        file << line unless other.index.include?(address)
+      end
+    end
+    def each_object(since: nil, &block)
+      Parser.load_many(path, since: since, &block)
+    end
+    def stats
+      @stats ||= GlobalStats.from(self)
+    end
+    def size
+      @size ||= File.open(path).each_line.count
+    end
+    def index
+      @index ||= Native.addresses_set(path)
+    end
+    def each_line_with_address
+      File.open(path).each_line do |line|
+        # This is a cheap, hacky extraction of addresses.
+        # So far it seems to work on 2.7.1 but that might not hold true on all versions.
+        # Also the root objects don't have an address, but that's fine
+        yield line, line.byteslice(14, 12).to_i(16)
+      end
+    end
+    def exist?
+      File.exist?(@path)
+    end
+    def presence
+      exist? ? self : nil
+    end
+  end
+end

data/lib/heap_profiler/full.rb ADDED Viewed

@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+require "heap_profiler/runtime"
+require "heap_profiler/parser"
+require "heap_profiler/dump"
+require "heap_profiler/index"
+require "heap_profiler/diff"
+require "heap_profiler/analyzer"
+require "heap_profiler/polychrome"
+require "heap_profiler/monochrome"
+require "heap_profiler/results"
+require "heap_profiler/cli"

data/lib/heap_profiler/index.rb ADDED Viewed

@@ -0,0 +1,89 @@
+# frozen_string_literal: true
+module HeapProfiler
+  class Index
+    def initialize(heap)
+      @heap = heap
+      @classes = {}
+      @strings = {}
+      @gems = {}
+      build!
+    end
+    def build!
+      @classes, @strings = Parser.build_index(@heap.path)
+      self
+    end
+    BUILTIN_CLASSES = {
+      FILE: "File",
+      ICLASS: "ICLASS",
+      COMPLEX: "Complex",
+      RATIONAL: "Rational",
+      BIGNUM: "Bignum",
+      FLOAT: "Float",
+      ARRAY: "Array",
+      STRING: "String",
+      HASH: "Hash",
+      SYMBOL: "Symbol",
+      MODULE: "Module",
+      CLASS: "Class",
+      REGEXP: "Regexp",
+      MATCH: "MatchData",
+      ROOT: "<VM Root>",
+      SHAPE: "SHAPE",
+    }.freeze
+    IMEMO_TYPES = Hash.new { |h, k| h[k] = "<#{k || 'unknown'}> (IMEMO)" }
+    DATA_TYPES = Hash.new { |h, k| h[k] = "<#{k || 'unknown'}> (DATA)" }
+    def guess_class(object)
+      type = object[:type]
+      if (class_name = BUILTIN_CLASSES[type])
+        return class_name
+      end
+      return IMEMO_TYPES[object[:imemo_type]] if type == :IMEMO
+      class_name = if (class_address = object[:class])
+        @classes.fetch(class_address) do
+          return DATA_TYPES[object[:struct]] if type == :DATA
+          $stderr.puts("WARNING: Couldn't infer class name of: #{object.inspect}")
+          nil
+        end
+      end
+      if type == :DATA && (class_name.nil? || class_name == "Object")
+        DATA_TYPES[object[:struct]]
+      else
+        class_name
+      end
+    end
+    def string_value(object)
+      value = object[:value]
+      return value if value
+      if object[:shared]
+        @strings[Native.parse_address(object[:references].first)]
+      end
+    end
+    def guess_gem(object)
+      path = object[:file]
+      @gems[path] ||=
+        if %r{(/gems/.*)*/gems/(?<gemname>[^/]+)} =~ path
+          gemname
+        elsif %r{/rubygems[\./]}.match?(path)
+          "rubygems"
+        elsif %r{ruby/2\.[^/]+/(?<stdlib>[^/\.]+)} =~ path
+          stdlib
+        elsif %r{(?<app>[^/]+/(bin|app|lib))} =~ path
+          app
+        else
+          "other"
+        end
+    end
+  end
+end

data/lib/heap_profiler/monochrome.rb ADDED Viewed

@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+module HeapProfiler
+  module Monochrome
+    class << self
+      def path(text)
+        text
+      end
+      def string(text)
+        text
+      end
+      def line(text)
+        text
+      end
+    end
+  end
+end