heap-profiler 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+ require_relative 'lib/heap_profiler/version'
3
+
4
+ Gem::Specification.new do |spec|
5
+ spec.name = "heap-profiler"
6
+ spec.version = Heap::Profiler::VERSION
7
+ spec.authors = ["Jean Boussier"]
8
+ spec.email = ["jean.boussier@gmail.com"]
9
+
10
+ spec.summary = 'Ruby heap profiling tool'
11
+ spec.description = 'Make several heap dumps and summarize allocated, retained memory'
12
+ spec.homepage = "https://github.com/Shopify/heap-profiler"
13
+ spec.license = "MIT"
14
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.6.0")
15
+
16
+ spec.metadata["allowed_push_host"] = "https://rubygems.org/"
17
+
18
+ spec.metadata["homepage_uri"] = spec.homepage
19
+ spec.metadata["source_code_uri"] = spec.homepage
20
+
21
+ # Specify which files should be added to the gem when it is released.
22
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
23
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
24
+ %x(git ls-files -z).split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
+ end
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+
30
+ spec.extensions << "ext/heap_profiler/extconf.rb"
31
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file is the gem entrypoint loaded by bundler.
4
+ # We only load what's needed to take heap dumps,
5
+ # not the code required to analyse them.
6
+ require "heap_profiler/runtime"
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ class Analyzer
5
+ class Dimension
6
+ METRICS = {
7
+ "objects" => -> (_object) { 1 },
8
+ "memory" => -> (object) { object[:memsize].to_i },
9
+ }.freeze
10
+
11
+ attr_reader :stats
12
+ def initialize(metric)
13
+ @stats = 0
14
+ @metric = METRICS.fetch(metric)
15
+ end
16
+
17
+ def process(_index, object)
18
+ @stats += @metric.call(object)
19
+ end
20
+
21
+ def sort!
22
+ end
23
+ end
24
+
25
+ class GroupedDimension < Dimension
26
+ GROUPINGS = {
27
+ "file" => -> (_index, object) { object[:file] },
28
+ "location" => -> (_index, object) do
29
+ if (file = object[:file]) && (line = object[:line])
30
+ "#{file}:#{line}"
31
+ end
32
+ end,
33
+ "gem" => -> (index, object) { index.guess_gem(object[:file]) },
34
+ "class" => -> (index, object) { index.guess_class(object) },
35
+ }.freeze
36
+
37
+ attr_reader :stats
38
+ def initialize(metric, grouping)
39
+ super(metric)
40
+ @grouping = GROUPINGS.fetch(grouping)
41
+ @stats = Hash.new { |h, k| h[k] = 0 }
42
+ end
43
+
44
+ def process(index, object)
45
+ if (group = @grouping.call(index, object))
46
+ @stats[group] += @metric.call(object)
47
+ end
48
+ end
49
+
50
+ def top_n(max)
51
+ stats.sort do |a, b|
52
+ cmp = b[1] <=> a[1]
53
+ cmp == 0 ? b[0] <=> a[0] : cmp
54
+ end.take(max)
55
+ end
56
+ end
57
+
58
+ class StringDimension
59
+ class StringLocation
60
+ attr_reader :location, :count, :memsize
61
+
62
+ def initialize(location)
63
+ @location = location
64
+ @count = 0
65
+ @memsize = 0
66
+ end
67
+
68
+ def process(object)
69
+ @count += 1
70
+ @memsize += object[:memsize]
71
+ end
72
+ end
73
+
74
+ class StringGroup
75
+ attr_reader :value, :count, :memsize, :locations
76
+ def initialize(value) # TODO: should we consider encoding?
77
+ @value = value
78
+ @locations_counts = Hash.new { |h, k| h[k] = StringLocation.new(k) }
79
+ @count = 0
80
+ @memsize = 0
81
+ end
82
+
83
+ def process(object)
84
+ @count += 1
85
+ @memsize += object[:memsize]
86
+ if (file = object[:file]) && (line = object[:line])
87
+ @locations_counts["#{file}:#{line}"].process(object)
88
+ end
89
+ end
90
+
91
+ def top_n(_max)
92
+ values = @locations_counts.values
93
+ values.sort! do |a, b|
94
+ cmp = b.count <=> a.count
95
+ cmp == 0 ? b.location <=> a.location : cmp
96
+ end
97
+ end
98
+ end
99
+
100
+ attr_reader :stats
101
+ def initialize
102
+ @stats = Hash.new { |h, k| h[k] = StringGroup.new(k) }
103
+ end
104
+
105
+ def process(_index, object)
106
+ return unless object[:type] == "STRING"
107
+ value = object[:value]
108
+ return unless value # broken strings etc
109
+ @stats[value].process(object)
110
+ end
111
+
112
+ def top_n(max)
113
+ values = @stats.values
114
+ values.sort! do |a, b|
115
+ cmp = b.count <=> a.count
116
+ cmp == 0 ? b.value <=> a.value : cmp
117
+ end
118
+ values.take(max)
119
+ end
120
+ end
121
+
122
+ def initialize(heap, index)
123
+ @heap = heap
124
+ @index = index
125
+ end
126
+
127
+ def run(metrics, groupings)
128
+ dimensions = {}
129
+ metrics.each do |metric|
130
+ if metric == "strings"
131
+ dimensions["strings"] = StringDimension.new
132
+ else
133
+ dimensions["total_#{metric}"] = Dimension.new(metric)
134
+ groupings.each do |grouping|
135
+ dimensions["#{metric}_by_#{grouping}"] = GroupedDimension.new(metric, grouping)
136
+ end
137
+ end
138
+ end
139
+
140
+ processors = dimensions.values
141
+ @heap.each_object do |object|
142
+ processors.each { |p| p.process(@index, object) }
143
+ end
144
+ dimensions
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ class CLI
5
+ def initialize(argv)
6
+ @argv = argv
7
+ end
8
+
9
+ def run
10
+ if @argv.size == 1
11
+ print_report(@argv.first)
12
+ 0
13
+ else
14
+ print_usage
15
+ 1
16
+ end
17
+ end
18
+
19
+ def print_report(path)
20
+ results = if File.directory?(path)
21
+ DiffResults.new(path)
22
+ else
23
+ HeapResults.new(path)
24
+ end
25
+ results.pretty_print(scale_bytes: true, normalize_paths: true)
26
+ end
27
+
28
+ def print_usage
29
+ puts "Usage: #{$PROGRAM_NAME} directory_or_heap_dump"
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ class Diff
5
+ attr_reader :allocated
6
+
7
+ def initialize(report_directory)
8
+ @report_directory = report_directory
9
+ @allocated = open_dump('allocated')
10
+ @generation = Integer(File.read(File.join(report_directory, 'generation.info')))
11
+ end
12
+
13
+ def allocated_diff
14
+ @allocated_diff ||= build_diff('allocated-diff', @allocated)
15
+ end
16
+
17
+ def retained_diff
18
+ @retained_diff ||= build_diff('retained-diff', open_dump('retained'))
19
+ end
20
+
21
+ private
22
+
23
+ def build_diff(name, base)
24
+ diff = open_dump(name)
25
+ unless diff.exist?
26
+ base.filter(File.join(@report_directory, "#{name}.heap"), since: @generation)
27
+ end
28
+ diff
29
+ end
30
+
31
+ def open_dump(name)
32
+ Dump.open(@report_directory, name)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ class Dump
5
+ class Stats
6
+ attr_accessor :count, :memsize
7
+
8
+ def process(object)
9
+ @count += 1
10
+ @memsize += object.fetch(:memsize, 0)
11
+ end
12
+
13
+ def initialize
14
+ @count = 0
15
+ @memsize = 0
16
+ end
17
+ end
18
+
19
+ class GlobalStats < Stats
20
+ class << self
21
+ def from(dump)
22
+ stats = new
23
+ dump.each_object do |object|
24
+ stats.process(object)
25
+ end
26
+ stats
27
+ end
28
+ end
29
+
30
+ def process(object)
31
+ super
32
+ per_type[object[:type]].process(object)
33
+ end
34
+
35
+ def per_type
36
+ @per_type = Hash.new { |h, k| h[k] = Stats.new }
37
+ end
38
+ end
39
+
40
+ class << self
41
+ def open(dir, name)
42
+ Dump.new(File.join(dir, "#{name}.heap"))
43
+ end
44
+ end
45
+
46
+ attr_reader :path
47
+
48
+ def initialize(path)
49
+ @path = path
50
+ end
51
+
52
+ # ObjectSpace.dump_all itself allocate objects.
53
+ #
54
+ # Before 2.7 it will allocate one String per class to get its name.
55
+ # After 2.7, it only allocate a couple hashes, a file etc.
56
+ #
57
+ # Either way we need to exclude them from the reports
58
+ def diff(other, file)
59
+ each_line_with_address do |line, address|
60
+ file << line unless other.index.include?(address)
61
+ end
62
+ end
63
+
64
+ def filter(as, since:)
65
+ Native.filter_heap(path, as, since: since)
66
+ end
67
+
68
+ def each_object(&block)
69
+ Native.load_many(path, batch_size: 10_000_000, &block)
70
+ end
71
+
72
+ def stats
73
+ @stats ||= GlobalStats.from(self)
74
+ end
75
+
76
+ def size
77
+ @size ||= File.open(path).each_line.count
78
+ end
79
+
80
+ def index
81
+ @index ||= Native.addresses_set(path)
82
+ end
83
+
84
+ def each_line_with_address
85
+ File.open(path).each_line do |line|
86
+ # This is a cheap, hacky extraction of addresses.
87
+ # So far it seems to work on 2.7.1 but that might not hold true on all versions.
88
+ # Also the root objects don't have an address, but that's fine
89
+ yield line, line.byteslice(14, 12).to_i(16)
90
+ end
91
+ end
92
+
93
+ def exist?
94
+ File.exist?(@path)
95
+ end
96
+
97
+ def presence
98
+ exist? ? self : nil
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "heap_profiler/runtime"
4
+ require "heap_profiler/native"
5
+ require "heap_profiler/dump"
6
+ require "heap_profiler/index"
7
+ require "heap_profiler/diff"
8
+ require "heap_profiler/analyzer"
9
+ require "heap_profiler/polychrome"
10
+ require "heap_profiler/monochrome"
11
+ require "heap_profiler/results"
12
+ require "heap_profiler/cli"
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ class Index
5
+ def initialize(heap)
6
+ @heap = heap
7
+ @classes = {}
8
+ @strings = {}
9
+ @gems = {}
10
+ build!
11
+ end
12
+
13
+ def build!
14
+ @classes, @strings = Native.build_index(@heap.path)
15
+ self
16
+ end
17
+
18
+ BUILTIN_CLASSES = {
19
+ "FILE" => "File",
20
+ "ICLASS" => "ICLASS",
21
+ "COMPLEX" => "Complex",
22
+ "RATIONAL" => "Rational",
23
+ "BIGNUM" => "Bignum",
24
+ "FLOAT" => "Float",
25
+ "ARRAY" => "Array",
26
+ "STRING" => "String",
27
+ "HASH" => "Hash",
28
+ "SYMBOL" => "Symbol",
29
+ "MODULE" => "Module",
30
+ "CLASS" => "Class",
31
+ "REGEXP" => "Regexp",
32
+ "MATCH" => "MatchData",
33
+ "ROOT" => "<VM Root>",
34
+ }.freeze
35
+
36
+ IMEMO_TYPES = Hash.new { |h, k| h[k] = "<#{k || 'unknown'}> (IMEMO)" }
37
+ DATA_TYPES = Hash.new { |h, k| h[k] = "<#{(k || 'unknown')}> (DATA)" }
38
+
39
+ def guess_class(object)
40
+ type = object[:type]
41
+ if (class_name = BUILTIN_CLASSES[type])
42
+ return class_name
43
+ end
44
+
45
+ return IMEMO_TYPES[object[:imemo_type]] if type == 'IMEMO'
46
+ return DATA_TYPES[object[:struct]] if type == 'DATA'
47
+
48
+ if type == "OBJECT" || type == "STRUCT"
49
+ class_address = object[:class]
50
+ return unless class_address
51
+
52
+ return @classes.fetch(class_address) do
53
+ $stderr.puts("WARNING: Couldn't infer class name of: #{object.inspect}")
54
+ nil
55
+ end
56
+ end
57
+
58
+ raise "[BUG] Couldn't infer type of #{object.inspect}"
59
+ end
60
+
61
+ def string_value(object)
62
+ value = object[:value]
63
+ return value if value
64
+
65
+ if object[:shared]
66
+ @strings[cast_address(object[:references].first)]
67
+ end
68
+ end
69
+
70
+ def cast_address(address)
71
+ address.to_s.to_i(16)
72
+ end
73
+
74
+ def guess_gem(path)
75
+ @gems[path] ||=
76
+ if %r{(/gems/.*)*/gems/(?<gemname>[^/]+)} =~ path
77
+ gemname
78
+ elsif %r{/rubygems[\./]}.match?(path)
79
+ "rubygems"
80
+ elsif %r{ruby/2\.[^/]+/(?<stdlib>[^/\.]+)} =~ path
81
+ stdlib
82
+ elsif %r{(?<app>[^/]+/(bin|app|lib))} =~ path
83
+ app
84
+ else
85
+ "other"
86
+ end
87
+ end
88
+ end
89
+ end