heap-profiler 0.8.0.rc1-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/cibuildgem.yaml +87 -0
  3. data/.github/workflows/tests.yml +33 -0
  4. data/.gitignore +11 -0
  5. data/.rubocop.yml +29 -0
  6. data/.ruby-version +1 -0
  7. data/Gemfile +13 -0
  8. data/Gemfile.lock +69 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +291 -0
  11. data/Rakefile +17 -0
  12. data/TODO.md +3 -0
  13. data/benchmark/address-parsing.rb +15 -0
  14. data/benchmark/indexing.rb +17 -0
  15. data/bin/console +15 -0
  16. data/bin/generate-report +49 -0
  17. data/bin/rubocop +29 -0
  18. data/bin/setup +8 -0
  19. data/bin/testunit +9 -0
  20. data/dev.yml +20 -0
  21. data/exe/heap-profiler +5 -0
  22. data/ext/heap_profiler/extconf.rb +9 -0
  23. data/ext/heap_profiler/heap_profiler.cpp +335 -0
  24. data/ext/heap_profiler/simdjson.cpp +15047 -0
  25. data/ext/heap_profiler/simdjson.h +32071 -0
  26. data/heap-profiler.gemspec +31 -0
  27. data/lib/heap-profiler.rb +6 -0
  28. data/lib/heap_profiler/3.1/heap_profiler.so +0 -0
  29. data/lib/heap_profiler/3.2/heap_profiler.so +0 -0
  30. data/lib/heap_profiler/3.3/heap_profiler.so +0 -0
  31. data/lib/heap_profiler/3.4/heap_profiler.so +0 -0
  32. data/lib/heap_profiler/4.0/heap_profiler.so +0 -0
  33. data/lib/heap_profiler/analyzer.rb +232 -0
  34. data/lib/heap_profiler/cli.rb +140 -0
  35. data/lib/heap_profiler/diff.rb +39 -0
  36. data/lib/heap_profiler/dump.rb +97 -0
  37. data/lib/heap_profiler/full.rb +12 -0
  38. data/lib/heap_profiler/index.rb +89 -0
  39. data/lib/heap_profiler/monochrome.rb +19 -0
  40. data/lib/heap_profiler/parser.rb +83 -0
  41. data/lib/heap_profiler/polychrome.rb +93 -0
  42. data/lib/heap_profiler/reporter.rb +118 -0
  43. data/lib/heap_profiler/results.rb +256 -0
  44. data/lib/heap_profiler/runtime.rb +30 -0
  45. data/lib/heap_profiler/version.rb +4 -0
  46. metadata +94 -0
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+ require_relative 'lib/heap_profiler/version'
3
+
4
+ Gem::Specification.new do |spec|
5
+ spec.name = "heap-profiler"
6
+ spec.version = HeapProfiler::VERSION
7
+ spec.authors = ["Jean Boussier"]
8
+ spec.email = ["jean.boussier@gmail.com"]
9
+
10
+ spec.summary = 'Ruby heap profiling tool'
11
+ spec.description = 'Make several heap dumps and summarize allocated, retained memory'
12
+ spec.homepage = "https://github.com/Shopify/heap-profiler"
13
+ spec.license = "MIT"
14
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
15
+
16
+ spec.metadata["allowed_push_host"] = "https://rubygems.org/"
17
+
18
+ spec.metadata["homepage_uri"] = spec.homepage
19
+ spec.metadata["source_code_uri"] = spec.homepage
20
+
21
+ # Specify which files should be added to the gem when it is released.
22
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
23
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
24
+ %x(git ls-files -z).split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
+ end
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+
30
+ spec.extensions << "ext/heap_profiler/extconf.rb"
31
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file is the gem entrypoint loaded by bundler.
4
+ # We only load what's needed to take heap dumps,
5
+ # not the code required to analyse them.
6
+ require "heap_profiler/runtime"
@@ -0,0 +1,232 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ class Analyzer
5
+ class Dimension
6
+ attr_reader :objects, :memory
7
+ def initialize
8
+ @objects = 0
9
+ @memory = 0
10
+ end
11
+
12
+ def process(_index, object)
13
+ @objects += 1
14
+ @memory += object[:memsize]
15
+ end
16
+
17
+ def stats(metric)
18
+ case metric
19
+ when "objects"
20
+ objects
21
+ when "memory"
22
+ memory
23
+ else
24
+ raise "Invalid metric: #{metric.inspect}"
25
+ end
26
+ end
27
+ end
28
+
29
+ class GroupedDimension < Dimension
30
+ class << self
31
+ def build(grouping)
32
+ klass = case grouping
33
+ when "file"
34
+ FileGroupDimension
35
+ when "location"
36
+ LocationGroupDimension
37
+ when "gem"
38
+ GemGroupDimension
39
+ when "class"
40
+ ClassGroupDimension
41
+ else
42
+ raise "Unknown grouping key: #{grouping.inspect}"
43
+ end
44
+ klass.new
45
+ end
46
+ end
47
+
48
+ def initialize
49
+ @objects = Hash.new { |h, k| h[k] = 0 }
50
+ @memory = Hash.new { |h, k| h[k] = 0 }
51
+ end
52
+
53
+ def process(index, object)
54
+ if (group = @grouping.call(index, object))
55
+ @objects[group] += 1
56
+ @memory[group] += object[:memsize]
57
+ end
58
+ end
59
+
60
+ def top_n(metric, max)
61
+ values = stats(metric).sort do |a, b|
62
+ b[1] <=> a[1]
63
+ end
64
+ top = values.take(max)
65
+ top.sort! do |a, b|
66
+ cmp = b[1] <=> a[1]
67
+ cmp == 0 ? b[0] <=> a[0] : cmp
68
+ end
69
+ top
70
+ end
71
+ end
72
+
73
+ class FileGroupDimension < GroupedDimension
74
+ def process(_index, object)
75
+ if (group = object[:file])
76
+ @objects[group] += 1
77
+ @memory[group] += object[:memsize]
78
+ end
79
+ end
80
+ end
81
+
82
+ class LocationGroupDimension < GroupedDimension
83
+ def process(_index, object)
84
+ file = object[:file]
85
+ line = object[:line]
86
+
87
+ if file && line
88
+ group = "#{file}:#{line}"
89
+ @objects[group] += 1
90
+ @memory[group] += object[:memsize]
91
+ end
92
+ end
93
+ end
94
+
95
+ class GemGroupDimension < GroupedDimension
96
+ def process(index, object)
97
+ if (group = index.guess_gem(object))
98
+ @objects[group] += 1
99
+ @memory[group] += object[:memsize]
100
+ end
101
+ end
102
+ end
103
+
104
+ class ClassGroupDimension < GroupedDimension
105
+ def process(index, object)
106
+ if (group = index.guess_class(object))
107
+ @objects[group] += 1
108
+ @memory[group] += object[:memsize]
109
+ end
110
+ end
111
+ end
112
+
113
+ class StringDimension
114
+ class StringLocation
115
+ attr_reader :location, :count, :memsize
116
+
117
+ def initialize(location)
118
+ @location = location
119
+ @count = 0
120
+ @memsize = 0
121
+ end
122
+
123
+ def process(object)
124
+ @count += 1
125
+ @memsize += object[:memsize]
126
+ end
127
+ end
128
+
129
+ class StringGroup
130
+ attr_reader :value, :count, :memsize, :locations
131
+ def initialize(value) # TODO: should we consider encoding?
132
+ @value = value
133
+ @locations_counts = Hash.new { |h, k| h[k] = StringLocation.new(k) }
134
+ @count = 0
135
+ @memsize = 0
136
+ end
137
+
138
+ def process(object)
139
+ @count += 1
140
+ @memsize += object[:memsize]
141
+ if (file = object[:file]) && (line = object[:line])
142
+ @locations_counts["#{file}:#{line}"].process(object)
143
+ end
144
+ end
145
+
146
+ def top_n(max)
147
+ values = @locations_counts.values
148
+ values.sort! do |a, b|
149
+ cmp = b.count <=> a.count
150
+ cmp == 0 ? b.location <=> a.location : cmp
151
+ end
152
+ values.take(max)
153
+ end
154
+ end
155
+
156
+ attr_reader :stats
157
+ def initialize
158
+ @stats = Hash.new { |h, k| h[k] = StringGroup.new(k) }
159
+ end
160
+
161
+ def process(_index, object)
162
+ return unless object[:type] == :STRING
163
+ value = object[:value]
164
+ return unless value # broken strings etc
165
+ @stats[value].process(object)
166
+ end
167
+
168
+ def top_n(max)
169
+ values = @stats.values
170
+ values.sort! do |a, b|
171
+ b.count <=> a.count
172
+ end
173
+ top = values.take(max)
174
+ top.sort! do |a, b|
175
+ cmp = b.count <=> a.count
176
+ cmp == 0 ? b.value <=> a.value : cmp
177
+ end
178
+ top
179
+ end
180
+ end
181
+
182
+ class ShapeEdgeDimension
183
+ def initialize
184
+ @stats = Hash.new(0)
185
+ end
186
+
187
+ def process(_index, object)
188
+ if name = object[:edge_name]
189
+ @stats[name] += 1
190
+ end
191
+ end
192
+
193
+ def top_n(max)
194
+ @stats.sort do |(a_name, a_count), (b_name, b_count)|
195
+ cmp = b_count <=> a_count
196
+ if cmp == 0
197
+ a_name <=> b_name
198
+ else
199
+ cmp
200
+ end
201
+ end.take(max)
202
+ end
203
+ end
204
+
205
+ def initialize(heap, index)
206
+ @heap = heap
207
+ @index = index
208
+ end
209
+
210
+ def run(metrics, groupings)
211
+ dimensions = {}
212
+ metrics.each do |metric|
213
+ if metric == "strings"
214
+ dimensions["strings"] = StringDimension.new
215
+ elsif metric == "shape_edges"
216
+ dimensions["shape_edges"] = ShapeEdgeDimension.new
217
+ else
218
+ dimensions["total"] = Dimension.new
219
+ groupings.each do |grouping|
220
+ dimensions[grouping] = GroupedDimension.build(grouping)
221
+ end
222
+ end
223
+ end
224
+
225
+ processors = dimensions.values
226
+ @heap.each_object do |object|
227
+ processors.each { |p| p.process(@index, object) }
228
+ end
229
+ dimensions
230
+ end
231
+ end
232
+ end
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+ require 'optparse'
3
+
4
+ module HeapProfiler
5
+ class CLI
6
+ def initialize(argv)
7
+ @argv = argv
8
+ end
9
+
10
+ def run
11
+ parser.parse!(@argv)
12
+
13
+ begin
14
+ case @argv.first
15
+ when "clean"
16
+ clean_dump(@argv[1])
17
+ return 0
18
+ when "report"
19
+ print_report(@argv[1])
20
+ return 0
21
+ else
22
+ if @argv.size == 1
23
+ print_report(@argv.first)
24
+ return 0
25
+ end
26
+ end
27
+ rescue CapacityError => error
28
+ STDERR.puts(error.message)
29
+ STDERR.puts("Current size: #{Parser.batch_size}B")
30
+ STDERR.puts("Try increasing it with --batch-size")
31
+ STDERR.puts
32
+ end
33
+ print_usage
34
+ 1
35
+ end
36
+
37
+ def print_report(path)
38
+ results = if File.directory?(path)
39
+ if @retained_only
40
+ DiffResults.new(path, ["retained"])
41
+ else
42
+ DiffResults.new(path)
43
+ end
44
+ else
45
+ HeapResults.new(path)
46
+ end
47
+ results.pretty_print(scale_bytes: true, normalize_paths: true)
48
+ end
49
+
50
+ def clean_dump(path)
51
+ require "json"
52
+ errors = index = 0
53
+ clean_path = "#{path}.clean"
54
+ File.open(clean_path, "w+") do |output|
55
+ File.open(path) do |input|
56
+ input.each_line do |line|
57
+ begin
58
+ JSON.parse(line)
59
+ rescue JSON::ParserError
60
+ errors += 1
61
+ $stderr.puts("Invalid JSON found on line #{index}. Skipping")
62
+ else
63
+ output.print(line)
64
+ end
65
+ index += 1
66
+ end
67
+ end
68
+ end
69
+ $stderr.puts("Processed #{index} lines, removed #{errors} invalid lines")
70
+ $stderr.puts("Clean dump available at #{clean_path}")
71
+ end
72
+
73
+ def print_usage
74
+ puts "Usage: #{$PROGRAM_NAME} directory_or_heap_dump"
75
+ puts @parser.help
76
+ end
77
+
78
+ SIZE_UNITS = {
79
+ 'B' => 1,
80
+ 'K' => 1_000,
81
+ 'M' => 1_000_000,
82
+ 'G' => 1_000_000_000,
83
+ }
84
+ def parse_byte_size(size_string)
85
+ if (match = size_string.match(/\A(\d+)(\w)?B?\z/i))
86
+ digits = Float(match[1])
87
+ base = 1
88
+ unit = match[2]&.upcase
89
+ if unit
90
+ base = SIZE_UNITS.fetch(unit) { raise ArgumentError, "Unknown size unit: #{unit}" }
91
+ end
92
+ size = (digits * base).to_i
93
+ if size > 4_000_000_000
94
+ raise ArgumentError, "Batch size can't be bigger than 4G"
95
+ end
96
+ size
97
+ else
98
+ raise ArgumentError, "#{size_string} is not a valid size"
99
+ end
100
+ end
101
+
102
+ private
103
+
104
+ def parser
105
+ @parser ||= OptionParser.new do |opts|
106
+ opts.banner = <<~EOS
107
+ Usage: heap-profiler [SUBCOMMAND] [ARGS]"
108
+
109
+ SUBCOMMANDS
110
+
111
+ report: Produce a full memory report from the provided dump. (default)
112
+
113
+ clean: Remove all malformed lines from the provided heap dump. Can be useful to workaround some ruby bugs.
114
+
115
+ GLOBAL OPTIONS
116
+ EOS
117
+ opts.separator ""
118
+
119
+ opts.on('-r', '--retained-only', 'Only compute report for memory retentions.') do
120
+ @retained_only = true
121
+ end
122
+
123
+ HeapProfiler::AbstractResults.top_entries_count = 50
124
+ opts.on("-m", "--max=NUM", Integer, "Max number of entries to output. (Defaults to 50)") do |arg|
125
+ HeapProfiler::AbstractResults.top_entries_count = arg
126
+ end
127
+
128
+ help = <<~EOS.lines.join(" ")
129
+ Sets the simdjson parser batch size. It must be larger than the largest JSON document in the heap dump, and defaults to 10MB.
130
+ EOS
131
+ opts.on('--batch-size SIZE', help.strip) do |size_string|
132
+ HeapProfiler::Parser.batch_size = parse_byte_size(size_string)
133
+ rescue ArgumentError => error
134
+ STDERR.puts "Invalid batch-size: #{error.message}"
135
+ exit 1
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ class Diff
5
+ class DumpSubset
6
+ def initialize(path, generation)
7
+ @path = path
8
+ @generation = generation
9
+ end
10
+
11
+ def each_object(&block)
12
+ Parser.load_many(@path, since: @generation, &block)
13
+ end
14
+ end
15
+
16
+ attr_reader :allocated
17
+
18
+ def initialize(report_directory)
19
+ @report_directory = report_directory
20
+ @allocated = open_dump('allocated')
21
+ @generation = Integer(File.read(File.join(report_directory, 'generation.info')))
22
+ @generation = nil if @generation == 0
23
+ end
24
+
25
+ def allocated_diff
26
+ @allocated_diff ||= DumpSubset.new(@allocated.path, @generation)
27
+ end
28
+
29
+ def retained_diff
30
+ @retained_diff ||= DumpSubset.new(open_dump('retained').path, @generation)
31
+ end
32
+
33
+ private
34
+
35
+ def open_dump(name)
36
+ Dump.open(@report_directory, name)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ class Dump
5
+ class Stats
6
+ attr_accessor :count, :memsize
7
+
8
+ def process(object)
9
+ @count += 1
10
+ @memsize += object.fetch(:memsize, 0)
11
+ end
12
+
13
+ def initialize
14
+ @count = 0
15
+ @memsize = 0
16
+ end
17
+ end
18
+
19
+ class GlobalStats < Stats
20
+ class << self
21
+ def from(dump)
22
+ stats = new
23
+ dump.each_object do |object|
24
+ stats.process(object)
25
+ end
26
+ stats
27
+ end
28
+ end
29
+
30
+ def process(object)
31
+ super
32
+ per_type[object[:type]].process(object)
33
+ end
34
+
35
+ def per_type
36
+ @per_type = Hash.new { |h, k| h[k] = Stats.new }
37
+ end
38
+ end
39
+
40
+ class << self
41
+ def open(dir, name)
42
+ Dump.new(File.join(dir, "#{name}.heap"))
43
+ end
44
+ end
45
+
46
+ attr_reader :path
47
+
48
+ def initialize(path)
49
+ @path = path
50
+ end
51
+
52
+ # ObjectSpace.dump_all itself allocate objects.
53
+ #
54
+ # Before 2.7 it will allocate one String per class to get its name.
55
+ # After 2.7, it only allocate a couple hashes, a file etc.
56
+ #
57
+ # Either way we need to exclude them from the reports
58
+ def diff(other, file)
59
+ each_line_with_address do |line, address|
60
+ file << line unless other.index.include?(address)
61
+ end
62
+ end
63
+
64
+ def each_object(since: nil, &block)
65
+ Parser.load_many(path, since: since, &block)
66
+ end
67
+
68
+ def stats
69
+ @stats ||= GlobalStats.from(self)
70
+ end
71
+
72
+ def size
73
+ @size ||= File.open(path).each_line.count
74
+ end
75
+
76
+ def index
77
+ @index ||= Native.addresses_set(path)
78
+ end
79
+
80
+ def each_line_with_address
81
+ File.open(path).each_line do |line|
82
+ # This is a cheap, hacky extraction of addresses.
83
+ # So far it seems to work on 2.7.1 but that might not hold true on all versions.
84
+ # Also the root objects don't have an address, but that's fine
85
+ yield line, line.byteslice(14, 12).to_i(16)
86
+ end
87
+ end
88
+
89
+ def exist?
90
+ File.exist?(@path)
91
+ end
92
+
93
+ def presence
94
+ exist? ? self : nil
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "heap_profiler/runtime"
4
+ require "heap_profiler/parser"
5
+ require "heap_profiler/dump"
6
+ require "heap_profiler/index"
7
+ require "heap_profiler/diff"
8
+ require "heap_profiler/analyzer"
9
+ require "heap_profiler/polychrome"
10
+ require "heap_profiler/monochrome"
11
+ require "heap_profiler/results"
12
+ require "heap_profiler/cli"
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ class Index
5
+ def initialize(heap)
6
+ @heap = heap
7
+ @classes = {}
8
+ @strings = {}
9
+ @gems = {}
10
+ build!
11
+ end
12
+
13
+ def build!
14
+ @classes, @strings = Parser.build_index(@heap.path)
15
+ self
16
+ end
17
+
18
+ BUILTIN_CLASSES = {
19
+ FILE: "File",
20
+ ICLASS: "ICLASS",
21
+ COMPLEX: "Complex",
22
+ RATIONAL: "Rational",
23
+ BIGNUM: "Bignum",
24
+ FLOAT: "Float",
25
+ ARRAY: "Array",
26
+ STRING: "String",
27
+ HASH: "Hash",
28
+ SYMBOL: "Symbol",
29
+ MODULE: "Module",
30
+ CLASS: "Class",
31
+ REGEXP: "Regexp",
32
+ MATCH: "MatchData",
33
+ ROOT: "<VM Root>",
34
+ SHAPE: "SHAPE",
35
+ }.freeze
36
+
37
+ IMEMO_TYPES = Hash.new { |h, k| h[k] = "<#{k || 'unknown'}> (IMEMO)" }
38
+ DATA_TYPES = Hash.new { |h, k| h[k] = "<#{k || 'unknown'}> (DATA)" }
39
+
40
+ def guess_class(object)
41
+ type = object[:type]
42
+ if (class_name = BUILTIN_CLASSES[type])
43
+ return class_name
44
+ end
45
+
46
+ return IMEMO_TYPES[object[:imemo_type]] if type == :IMEMO
47
+
48
+ class_name = if (class_address = object[:class])
49
+ @classes.fetch(class_address) do
50
+ return DATA_TYPES[object[:struct]] if type == :DATA
51
+
52
+ $stderr.puts("WARNING: Couldn't infer class name of: #{object.inspect}")
53
+ nil
54
+ end
55
+ end
56
+
57
+ if type == :DATA && (class_name.nil? || class_name == "Object")
58
+ DATA_TYPES[object[:struct]]
59
+ else
60
+ class_name
61
+ end
62
+ end
63
+
64
+ def string_value(object)
65
+ value = object[:value]
66
+ return value if value
67
+
68
+ if object[:shared]
69
+ @strings[Native.parse_address(object[:references].first)]
70
+ end
71
+ end
72
+
73
+ def guess_gem(object)
74
+ path = object[:file]
75
+ @gems[path] ||=
76
+ if %r{(/gems/.*)*/gems/(?<gemname>[^/]+)} =~ path
77
+ gemname
78
+ elsif %r{/rubygems[\./]}.match?(path)
79
+ "rubygems"
80
+ elsif %r{ruby/2\.[^/]+/(?<stdlib>[^/\.]+)} =~ path
81
+ stdlib
82
+ elsif %r{(?<app>[^/]+/(bin|app|lib))} =~ path
83
+ app
84
+ else
85
+ "other"
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeapProfiler
4
+ module Monochrome
5
+ class << self
6
+ def path(text)
7
+ text
8
+ end
9
+
10
+ def string(text)
11
+ text
12
+ end
13
+
14
+ def line(text)
15
+ text
16
+ end
17
+ end
18
+ end
19
+ end