heap-profiler 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+ require_relative 'lib/heap_profiler/version'
3
+
4
+ Gem::Specification.new do |spec|
5
+ spec.name = "heap-profiler"
6
+ spec.version = Heap::Profiler::VERSION
7
+ spec.authors = ["Jean Boussier"]
8
+ spec.email = ["jean.boussier@gmail.com"]
9
+
10
+ spec.summary = 'Ruby heap profiling tool'
11
+ spec.description = 'Make several heap dumps and summarize allocated, retained memory'
12
+ spec.homepage = "https://github.com/Shopify/heap-profiler"
13
+ spec.license = "MIT"
14
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.6.0")
15
+
16
+ spec.metadata["allowed_push_host"] = "https://rubygems.org/"
17
+
18
+ spec.metadata["homepage_uri"] = spec.homepage
19
+ spec.metadata["source_code_uri"] = spec.homepage
20
+
21
+ # Specify which files should be added to the gem when it is released.
22
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
23
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
24
+ %x(git ls-files -z).split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
+ end
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+
30
+ spec.extensions << "ext/heap_profiler/extconf.rb"
31
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file is the gem entrypoint loaded by bundler.
4
+ # We only load what's needed to take heap dumps,
5
+ # not the code required to analyse them.
6
+ require "heap_profiler/runtime"
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ class Analyzer
5
+ class Dimension
6
+ METRICS = {
7
+ "objects" => -> (_object) { 1 },
8
+ "memory" => -> (object) { object[:memsize].to_i },
9
+ }.freeze
10
+
11
+ attr_reader :stats
12
+ def initialize(metric)
13
+ @stats = 0
14
+ @metric = METRICS.fetch(metric)
15
+ end
16
+
17
+ def process(_index, object)
18
+ @stats += @metric.call(object)
19
+ end
20
+
21
+ def sort!
22
+ end
23
+ end
24
+
25
+ class GroupedDimension < Dimension
26
+ GROUPINGS = {
27
+ "file" => -> (_index, object) { object[:file] },
28
+ "location" => -> (_index, object) do
29
+ if (file = object[:file]) && (line = object[:line])
30
+ "#{file}:#{line}"
31
+ end
32
+ end,
33
+ "gem" => -> (index, object) { index.guess_gem(object[:file]) },
34
+ "class" => -> (index, object) { index.guess_class(object) },
35
+ }.freeze
36
+
37
+ attr_reader :stats
38
+ def initialize(metric, grouping)
39
+ super(metric)
40
+ @grouping = GROUPINGS.fetch(grouping)
41
+ @stats = Hash.new { |h, k| h[k] = 0 }
42
+ end
43
+
44
+ def process(index, object)
45
+ if (group = @grouping.call(index, object))
46
+ @stats[group] += @metric.call(object)
47
+ end
48
+ end
49
+
50
+ def top_n(max)
51
+ stats.sort do |a, b|
52
+ cmp = b[1] <=> a[1]
53
+ cmp == 0 ? b[0] <=> a[0] : cmp
54
+ end.take(max)
55
+ end
56
+ end
57
+
58
+ class StringDimension
59
+ class StringLocation
60
+ attr_reader :location, :count, :memsize
61
+
62
+ def initialize(location)
63
+ @location = location
64
+ @count = 0
65
+ @memsize = 0
66
+ end
67
+
68
+ def process(object)
69
+ @count += 1
70
+ @memsize += object[:memsize]
71
+ end
72
+ end
73
+
74
+ class StringGroup
75
+ attr_reader :value, :count, :memsize, :locations
76
+ def initialize(value) # TODO: should we consider encoding?
77
+ @value = value
78
+ @locations_counts = Hash.new { |h, k| h[k] = StringLocation.new(k) }
79
+ @count = 0
80
+ @memsize = 0
81
+ end
82
+
83
+ def process(object)
84
+ @count += 1
85
+ @memsize += object[:memsize]
86
+ if (file = object[:file]) && (line = object[:line])
87
+ @locations_counts["#{file}:#{line}"].process(object)
88
+ end
89
+ end
90
+
91
+ def top_n(_max)
92
+ values = @locations_counts.values
93
+ values.sort! do |a, b|
94
+ cmp = b.count <=> a.count
95
+ cmp == 0 ? b.location <=> a.location : cmp
96
+ end
97
+ end
98
+ end
99
+
100
+ attr_reader :stats
101
+ def initialize
102
+ @stats = Hash.new { |h, k| h[k] = StringGroup.new(k) }
103
+ end
104
+
105
+ def process(_index, object)
106
+ return unless object[:type] == "STRING"
107
+ value = object[:value]
108
+ return unless value # broken strings etc
109
+ @stats[value].process(object)
110
+ end
111
+
112
+ def top_n(max)
113
+ values = @stats.values
114
+ values.sort! do |a, b|
115
+ cmp = b.count <=> a.count
116
+ cmp == 0 ? b.value <=> a.value : cmp
117
+ end
118
+ values.take(max)
119
+ end
120
+ end
121
+
122
+ def initialize(heap, index)
123
+ @heap = heap
124
+ @index = index
125
+ end
126
+
127
+ def run(metrics, groupings)
128
+ dimensions = {}
129
+ metrics.each do |metric|
130
+ if metric == "strings"
131
+ dimensions["strings"] = StringDimension.new
132
+ else
133
+ dimensions["total_#{metric}"] = Dimension.new(metric)
134
+ groupings.each do |grouping|
135
+ dimensions["#{metric}_by_#{grouping}"] = GroupedDimension.new(metric, grouping)
136
+ end
137
+ end
138
+ end
139
+
140
+ processors = dimensions.values
141
+ @heap.each_object do |object|
142
+ processors.each { |p| p.process(@index, object) }
143
+ end
144
+ dimensions
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ class CLI
5
+ def initialize(argv)
6
+ @argv = argv
7
+ end
8
+
9
+ def run
10
+ if @argv.size == 1
11
+ print_report(@argv.first)
12
+ 0
13
+ else
14
+ print_usage
15
+ 1
16
+ end
17
+ end
18
+
19
+ def print_report(path)
20
+ results = if File.directory?(path)
21
+ DiffResults.new(path)
22
+ else
23
+ HeapResults.new(path)
24
+ end
25
+ results.pretty_print(scale_bytes: true, normalize_paths: true)
26
+ end
27
+
28
+ def print_usage
29
+ puts "Usage: #{$PROGRAM_NAME} directory_or_heap_dump"
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ class Diff
5
+ attr_reader :allocated
6
+
7
+ def initialize(report_directory)
8
+ @report_directory = report_directory
9
+ @allocated = open_dump('allocated')
10
+ @generation = Integer(File.read(File.join(report_directory, 'generation.info')))
11
+ end
12
+
13
+ def allocated_diff
14
+ @allocated_diff ||= build_diff('allocated-diff', @allocated)
15
+ end
16
+
17
+ def retained_diff
18
+ @retained_diff ||= build_diff('retained-diff', open_dump('retained'))
19
+ end
20
+
21
+ private
22
+
23
+ def build_diff(name, base)
24
+ diff = open_dump(name)
25
+ unless diff.exist?
26
+ base.filter(File.join(@report_directory, "#{name}.heap"), since: @generation)
27
+ end
28
+ diff
29
+ end
30
+
31
+ def open_dump(name)
32
+ Dump.open(@report_directory, name)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ class Dump
5
+ class Stats
6
+ attr_accessor :count, :memsize
7
+
8
+ def process(object)
9
+ @count += 1
10
+ @memsize += object.fetch(:memsize, 0)
11
+ end
12
+
13
+ def initialize
14
+ @count = 0
15
+ @memsize = 0
16
+ end
17
+ end
18
+
19
+ class GlobalStats < Stats
20
+ class << self
21
+ def from(dump)
22
+ stats = new
23
+ dump.each_object do |object|
24
+ stats.process(object)
25
+ end
26
+ stats
27
+ end
28
+ end
29
+
30
+ def process(object)
31
+ super
32
+ per_type[object[:type]].process(object)
33
+ end
34
+
35
+ def per_type
36
+ @per_type = Hash.new { |h, k| h[k] = Stats.new }
37
+ end
38
+ end
39
+
40
+ class << self
41
+ def open(dir, name)
42
+ Dump.new(File.join(dir, "#{name}.heap"))
43
+ end
44
+ end
45
+
46
+ attr_reader :path
47
+
48
+ def initialize(path)
49
+ @path = path
50
+ end
51
+
52
+ # ObjectSpace.dump_all itself allocate objects.
53
+ #
54
+ # Before 2.7 it will allocate one String per class to get its name.
55
+ # After 2.7, it only allocate a couple hashes, a file etc.
56
+ #
57
+ # Either way we need to exclude them from the reports
58
+ def diff(other, file)
59
+ each_line_with_address do |line, address|
60
+ file << line unless other.index.include?(address)
61
+ end
62
+ end
63
+
64
+ def filter(as, since:)
65
+ Native.filter_heap(path, as, since: since)
66
+ end
67
+
68
+ def each_object(&block)
69
+ Native.load_many(path, batch_size: 10_000_000, &block)
70
+ end
71
+
72
+ def stats
73
+ @stats ||= GlobalStats.from(self)
74
+ end
75
+
76
+ def size
77
+ @size ||= File.open(path).each_line.count
78
+ end
79
+
80
+ def index
81
+ @index ||= Native.addresses_set(path)
82
+ end
83
+
84
+ def each_line_with_address
85
+ File.open(path).each_line do |line|
86
+ # This is a cheap, hacky extraction of addresses.
87
+ # So far it seems to work on 2.7.1 but that might not hold true on all versions.
88
+ # Also the root objects don't have an address, but that's fine
89
+ yield line, line.byteslice(14, 12).to_i(16)
90
+ end
91
+ end
92
+
93
+ def exist?
94
+ File.exist?(@path)
95
+ end
96
+
97
+ def presence
98
+ exist? ? self : nil
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "heap_profiler/runtime"
4
+ require "heap_profiler/native"
5
+ require "heap_profiler/dump"
6
+ require "heap_profiler/index"
7
+ require "heap_profiler/diff"
8
+ require "heap_profiler/analyzer"
9
+ require "heap_profiler/polychrome"
10
+ require "heap_profiler/monochrome"
11
+ require "heap_profiler/results"
12
+ require "heap_profiler/cli"
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ class Index
5
+ def initialize(heap)
6
+ @heap = heap
7
+ @classes = {}
8
+ @strings = {}
9
+ @gems = {}
10
+ build!
11
+ end
12
+
13
+ def build!
14
+ @classes, @strings = Native.build_index(@heap.path)
15
+ self
16
+ end
17
+
18
+ BUILTIN_CLASSES = {
19
+ "FILE" => "File",
20
+ "ICLASS" => "ICLASS",
21
+ "COMPLEX" => "Complex",
22
+ "RATIONAL" => "Rational",
23
+ "BIGNUM" => "Bignum",
24
+ "FLOAT" => "Float",
25
+ "ARRAY" => "Array",
26
+ "STRING" => "String",
27
+ "HASH" => "Hash",
28
+ "SYMBOL" => "Symbol",
29
+ "MODULE" => "Module",
30
+ "CLASS" => "Class",
31
+ "REGEXP" => "Regexp",
32
+ "MATCH" => "MatchData",
33
+ "ROOT" => "<VM Root>",
34
+ }.freeze
35
+
36
+ IMEMO_TYPES = Hash.new { |h, k| h[k] = "<#{k || 'unknown'}> (IMEMO)" }
37
+ DATA_TYPES = Hash.new { |h, k| h[k] = "<#{(k || 'unknown')}> (DATA)" }
38
+
39
+ def guess_class(object)
40
+ type = object[:type]
41
+ if (class_name = BUILTIN_CLASSES[type])
42
+ return class_name
43
+ end
44
+
45
+ return IMEMO_TYPES[object[:imemo_type]] if type == 'IMEMO'
46
+ return DATA_TYPES[object[:struct]] if type == 'DATA'
47
+
48
+ if type == "OBJECT" || type == "STRUCT"
49
+ class_address = object[:class]
50
+ return unless class_address
51
+
52
+ return @classes.fetch(class_address) do
53
+ $stderr.puts("WARNING: Couldn't infer class name of: #{object.inspect}")
54
+ nil
55
+ end
56
+ end
57
+
58
+ raise "[BUG] Couldn't infer type of #{object.inspect}"
59
+ end
60
+
61
+ def string_value(object)
62
+ value = object[:value]
63
+ return value if value
64
+
65
+ if object[:shared]
66
+ @strings[cast_address(object[:references].first)]
67
+ end
68
+ end
69
+
70
+ def cast_address(address)
71
+ address.to_s.to_i(16)
72
+ end
73
+
74
+ def guess_gem(path)
75
+ @gems[path] ||=
76
+ if %r{(/gems/.*)*/gems/(?<gemname>[^/]+)} =~ path
77
+ gemname
78
+ elsif %r{/rubygems[\./]}.match?(path)
79
+ "rubygems"
80
+ elsif %r{ruby/2\.[^/]+/(?<stdlib>[^/\.]+)} =~ path
81
+ stdlib
82
+ elsif %r{(?<app>[^/]+/(bin|app|lib))} =~ path
83
+ app
84
+ else
85
+ "other"
86
+ end
87
+ end
88
+ end
89
+ end