heap-profiler 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/tests.yml +59 -0
- data/.gitignore +11 -0
- data/.rubocop.yml +20 -0
- data/.travis.yml +6 -0
- data/Gemfile +12 -0
- data/Gemfile.lock +51 -0
- data/LICENSE.txt +21 -0
- data/README.md +269 -0
- data/Rakefile +17 -0
- data/TODO.md +9 -0
- data/benchmark/address-parsing.rb +12 -0
- data/benchmark/indexing.rb +14 -0
- data/bin/console +15 -0
- data/bin/generate-report +34 -0
- data/bin/rubocop +29 -0
- data/bin/setup +8 -0
- data/bin/testunit +9 -0
- data/dev.yml +20 -0
- data/exe/heap-profiler +6 -0
- data/ext/heap_profiler/extconf.rb +7 -0
- data/ext/heap_profiler/heap_profiler.cpp +262 -0
- data/ext/heap_profiler/simdjson.cpp +17654 -0
- data/ext/heap_profiler/simdjson.h +7716 -0
- data/heap-profiler.gemspec +31 -0
- data/lib/heap-profiler.rb +6 -0
- data/lib/heap_profiler/analyzer.rb +147 -0
- data/lib/heap_profiler/cli.rb +32 -0
- data/lib/heap_profiler/diff.rb +35 -0
- data/lib/heap_profiler/dump.rb +101 -0
- data/lib/heap_profiler/full.rb +12 -0
- data/lib/heap_profiler/index.rb +89 -0
- data/lib/heap_profiler/monochrome.rb +19 -0
- data/lib/heap_profiler/native.rb +48 -0
- data/lib/heap_profiler/polychrome.rb +93 -0
- data/lib/heap_profiler/reporter.rb +107 -0
- data/lib/heap_profiler/results.rb +212 -0
- data/lib/heap_profiler/runtime.rb +29 -0
- data/lib/heap_profiler/version.rb +6 -0
- metadata +86 -0
data/Rakefile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
require "rake/testtask"
|
4
|
+
require "rake/extensiontask"
|
5
|
+
|
6
|
+
Rake::ExtensionTask.new("heap_profiler") do |ext|
|
7
|
+
ext.ext_dir = 'ext/heap_profiler'
|
8
|
+
ext.lib_dir = "lib/heap_profiler"
|
9
|
+
end
|
10
|
+
|
11
|
+
Rake::TestTask.new(:test) do |t|
|
12
|
+
t.libs << "test"
|
13
|
+
t.libs << "lib"
|
14
|
+
t.test_files = FileList["test/**/*_test.rb"]
|
15
|
+
end
|
16
|
+
|
17
|
+
task default: %i(compile test)
|
data/TODO.md
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "benchmark/ips"
|
6
|
+
require "heap_profiler/full"
|
7
|
+
|
8
|
+
Benchmark.ips do |x|
|
9
|
+
x.report("ruby") { HeapProfiler::Native.ruby_parse_address("0x7f921e88a8f8") }
|
10
|
+
x.report("cpp") { HeapProfiler::Native.parse_address("0x7f921e88a8f8") }
|
11
|
+
x.compare!
|
12
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "benchmark/ips"
|
6
|
+
require "heap_profiler/full"
|
7
|
+
|
8
|
+
FIXTURE_PATH = File.expand_path("../../test/fixtures/diffed-heap/allocated.heap", __FILE__)
|
9
|
+
|
10
|
+
Benchmark.ips do |x|
|
11
|
+
x.report("ruby") { HeapProfiler::Native.ruby_build_index(FIXTURE_PATH) }
|
12
|
+
x.report("cpp") { HeapProfiler::Native.build_index(FIXTURE_PATH) }
|
13
|
+
x.compare!
|
14
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "heap-profiler"
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
+
# require "pry"
|
12
|
+
# Pry.start
|
13
|
+
|
14
|
+
require "irb"
|
15
|
+
IRB.start(__FILE__)
|
data/bin/generate-report
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
require 'bundler/setup'
|
4
|
+
require "heap-profiler"
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
class SomeCustomStuff
|
8
|
+
end
|
9
|
+
|
10
|
+
$freed_objects = [+"i am free", +"i am free too"]
|
11
|
+
|
12
|
+
dir = ARGV.first
|
13
|
+
FileUtils.mkdir_p(dir)
|
14
|
+
FileUtils.rm_rf(Dir[File.join(dir, '*')])
|
15
|
+
|
16
|
+
if ARGV[1] == '--empty'
|
17
|
+
HeapProfiler.report(dir) {}
|
18
|
+
else
|
19
|
+
HeapProfiler.report(dir) do
|
20
|
+
$retained_objects = [
|
21
|
+
+"I am retained",
|
22
|
+
+"I am retained",
|
23
|
+
+"I am retained too",
|
24
|
+
SomeCustomStuff.new,
|
25
|
+
]
|
26
|
+
"I am a very very long string " * 4
|
27
|
+
[
|
28
|
+
+"I am allocated",
|
29
|
+
+"I am allocated too",
|
30
|
+
]
|
31
|
+
$freed_objects = nil
|
32
|
+
Struct.new("foo".to_sym, :bar)
|
33
|
+
end
|
34
|
+
end
|
data/bin/rubocop
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rubocop' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rubocop", "rubocop")
|
data/bin/setup
ADDED
data/bin/testunit
ADDED
data/dev.yml
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
name: heap-profiler
|
2
|
+
|
3
|
+
type: ruby
|
4
|
+
|
5
|
+
up:
|
6
|
+
- ruby: 2.7.1
|
7
|
+
- bundler
|
8
|
+
|
9
|
+
commands:
|
10
|
+
console:
|
11
|
+
desc: 'start a console'
|
12
|
+
run: bin/console
|
13
|
+
run:
|
14
|
+
desc: 'start the application'
|
15
|
+
run: bin/run
|
16
|
+
test:
|
17
|
+
syntax:
|
18
|
+
argument: file
|
19
|
+
optional: args...
|
20
|
+
run: bin/testunit
|
data/exe/heap-profiler
ADDED
@@ -0,0 +1,262 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "simdjson.h"
|
3
|
+
#include <fstream>
|
4
|
+
|
5
|
+
using namespace simdjson;
|
6
|
+
|
7
|
+
static VALUE rb_eHeapProfilerError, sym_type, sym_class, sym_address, sym_value,
|
8
|
+
sym_memsize, sym_imemo_type, sym_struct, sym_file, sym_line, sym_shared,
|
9
|
+
sym_references;
|
10
|
+
|
11
|
+
static dom::parser parser;
|
12
|
+
|
13
|
+
const uint64_t digittoval[256] = {
|
14
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
15
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
16
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8,
|
17
|
+
9, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0,
|
18
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
19
|
+
0, 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
20
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
25
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0};
|
28
|
+
|
29
|
+
// Inspired by https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
|
30
|
+
// Ruby addresses in heap dump are hexadecimal strings "0x000000000000"...0xffffffffffff".
|
31
|
+
// The format being fairly stable allow for faster parsing. It should be equivalent to String#to_i(16).
|
32
|
+
static inline uint64_t parse_address(const char * address) {
|
33
|
+
return (
|
34
|
+
digittoval[address[ 2]] << 44 |
|
35
|
+
digittoval[address[ 3]] << 40 |
|
36
|
+
digittoval[address[ 4]] << 36 |
|
37
|
+
digittoval[address[ 5]] << 32 |
|
38
|
+
digittoval[address[ 6]] << 28 |
|
39
|
+
digittoval[address[ 7]] << 24 |
|
40
|
+
digittoval[address[ 8]] << 20 |
|
41
|
+
digittoval[address[ 9]] << 16 |
|
42
|
+
digittoval[address[10]] << 12 |
|
43
|
+
digittoval[address[11]] << 8 |
|
44
|
+
digittoval[address[12]] << 4 |
|
45
|
+
digittoval[address[13]]
|
46
|
+
);
|
47
|
+
}
|
48
|
+
|
49
|
+
static inline int64_t parse_address(dom::element element) {
|
50
|
+
std::string_view address;
|
51
|
+
if (element.get(address)) {
|
52
|
+
return 0; // ROOT object
|
53
|
+
}
|
54
|
+
assert(address.size() == 14);
|
55
|
+
return parse_address(address.data());
|
56
|
+
}
|
57
|
+
|
58
|
+
static VALUE rb_heap_build_index(VALUE self, VALUE path, VALUE batch_size) {
|
59
|
+
Check_Type(path, T_STRING);
|
60
|
+
Check_Type(batch_size, T_FIXNUM);
|
61
|
+
|
62
|
+
VALUE string_index = rb_hash_new();
|
63
|
+
VALUE class_index = rb_hash_new();
|
64
|
+
|
65
|
+
try {
|
66
|
+
auto [objects, error] = parser.load_many(RSTRING_PTR(path), FIX2INT(batch_size));
|
67
|
+
if (error != SUCCESS) {
|
68
|
+
rb_raise(rb_eHeapProfilerError, "%s", error_message(error));
|
69
|
+
}
|
70
|
+
|
71
|
+
for (dom::object object : objects) {
|
72
|
+
std::string_view type;
|
73
|
+
if (object["type"].get(type)) {
|
74
|
+
continue;
|
75
|
+
}
|
76
|
+
|
77
|
+
if (type == "STRING") {
|
78
|
+
std::string_view value;
|
79
|
+
if (!object["value"].get(value)) {
|
80
|
+
VALUE address = INT2FIX(parse_address(object["address"]));
|
81
|
+
VALUE string = rb_utf8_str_new(value.data(), value.size());
|
82
|
+
rb_hash_aset(string_index, address, string);
|
83
|
+
}
|
84
|
+
} else if (type == "CLASS" || type == "MODULE") {
|
85
|
+
std::string_view name;
|
86
|
+
if (!object["name"].get(name)) {
|
87
|
+
VALUE address = INT2FIX(parse_address(object["address"]));
|
88
|
+
VALUE class_name = rb_utf8_str_new(name.data(), name.size());
|
89
|
+
rb_hash_aset(class_index, address, class_name);
|
90
|
+
}
|
91
|
+
}
|
92
|
+
}
|
93
|
+
}
|
94
|
+
catch (simdjson::simdjson_error error)
|
95
|
+
{
|
96
|
+
rb_raise(rb_eHeapProfilerError, "exc: %s", error.what());
|
97
|
+
}
|
98
|
+
|
99
|
+
VALUE return_value = rb_ary_new();
|
100
|
+
rb_ary_push(return_value, class_index);
|
101
|
+
rb_ary_push(return_value, string_index);
|
102
|
+
return return_value;
|
103
|
+
}
|
104
|
+
|
105
|
+
static VALUE rb_heap_parse_address(VALUE self, VALUE address) {
|
106
|
+
Check_Type(address, T_STRING);
|
107
|
+
assert(RSTRING_LEN(address) == 14);
|
108
|
+
return INT2FIX(parse_address(RSTRING_PTR(address)));
|
109
|
+
}
|
110
|
+
|
111
|
+
static VALUE make_ruby_object(dom::object object)
|
112
|
+
{
|
113
|
+
VALUE hash = rb_hash_new();
|
114
|
+
|
115
|
+
std::string_view type;
|
116
|
+
if (!object["type"].get(type)) {
|
117
|
+
rb_hash_aset(hash, sym_type, rb_utf8_str_new(type.data(), type.size()));
|
118
|
+
}
|
119
|
+
|
120
|
+
std::string_view address;
|
121
|
+
if (!object["address"].get(address)) {
|
122
|
+
rb_hash_aset(hash, sym_address, INT2FIX(parse_address(address.data())));
|
123
|
+
}
|
124
|
+
|
125
|
+
std::string_view _class;
|
126
|
+
if (!object["class"].get(_class)) {
|
127
|
+
rb_hash_aset(hash, sym_class, INT2FIX(parse_address(_class.data())));
|
128
|
+
}
|
129
|
+
|
130
|
+
uint64_t memsize;
|
131
|
+
if (!object["memsize"].get(memsize)) {
|
132
|
+
rb_hash_aset(hash, sym_memsize, INT2FIX(memsize));
|
133
|
+
}
|
134
|
+
|
135
|
+
if (type == "IMEMO") {
|
136
|
+
std::string_view imemo_type;
|
137
|
+
if (!object["imemo_type"].get(imemo_type)) {
|
138
|
+
rb_hash_aset(hash, sym_imemo_type, rb_utf8_str_new(imemo_type.data(), imemo_type.size()));
|
139
|
+
}
|
140
|
+
} else if (type == "DATA") {
|
141
|
+
std::string_view _struct;
|
142
|
+
if (!object["struct"].get(_struct)) {
|
143
|
+
rb_hash_aset(hash, sym_struct, rb_utf8_str_new(_struct.data(), _struct.size()));
|
144
|
+
}
|
145
|
+
} else if (type == "STRING") {
|
146
|
+
std::string_view value;
|
147
|
+
if (!object["value"].get(value)) {
|
148
|
+
rb_hash_aset(hash, sym_value, rb_utf8_str_new(value.data(), value.size()));
|
149
|
+
}
|
150
|
+
|
151
|
+
bool shared;
|
152
|
+
if (!object["shared"].get(shared)) {
|
153
|
+
rb_hash_aset(hash, sym_shared, shared ? Qtrue : Qnil);
|
154
|
+
if (shared) {
|
155
|
+
VALUE references = rb_ary_new();
|
156
|
+
dom::array reference_elements(object["references"]);
|
157
|
+
for (dom::element reference_element : reference_elements) {
|
158
|
+
std::string_view reference;
|
159
|
+
if (!reference_element.get(reference)) {
|
160
|
+
rb_ary_push(references, INT2FIX(parse_address(reference.data())));
|
161
|
+
}
|
162
|
+
}
|
163
|
+
rb_hash_aset(hash, sym_references, references);
|
164
|
+
}
|
165
|
+
}
|
166
|
+
}
|
167
|
+
|
168
|
+
std::string_view file;
|
169
|
+
if (!object["file"].get(file)) {
|
170
|
+
rb_hash_aset(hash, sym_file, rb_utf8_str_new(file.data(), file.size()));
|
171
|
+
}
|
172
|
+
|
173
|
+
uint64_t line;
|
174
|
+
if (!object["line"].get(line)) {
|
175
|
+
rb_hash_aset(hash, sym_line, INT2FIX(line));
|
176
|
+
}
|
177
|
+
|
178
|
+
return hash;
|
179
|
+
}
|
180
|
+
|
181
|
+
static VALUE rb_heap_load_many(VALUE self, VALUE arg, VALUE batch_size)
|
182
|
+
{
|
183
|
+
Check_Type(arg, T_STRING);
|
184
|
+
Check_Type(batch_size, T_FIXNUM);
|
185
|
+
|
186
|
+
try
|
187
|
+
{
|
188
|
+
auto [docs, error] = parser.load_many(RSTRING_PTR(arg), FIX2INT(batch_size));
|
189
|
+
if (error != SUCCESS)
|
190
|
+
{
|
191
|
+
rb_raise(rb_eHeapProfilerError, "%s", error_message(error));
|
192
|
+
}
|
193
|
+
|
194
|
+
for (dom::element doc : docs)
|
195
|
+
{
|
196
|
+
rb_yield(make_ruby_object(doc));
|
197
|
+
}
|
198
|
+
|
199
|
+
return Qnil;
|
200
|
+
}
|
201
|
+
catch (simdjson::simdjson_error error)
|
202
|
+
{
|
203
|
+
rb_raise(rb_eHeapProfilerError, "%s", error.what());
|
204
|
+
}
|
205
|
+
}
|
206
|
+
|
207
|
+
static VALUE rb_heap_filter(VALUE self, VALUE source_path, VALUE destination_path, VALUE _generation)
|
208
|
+
{
|
209
|
+
Check_Type(source_path, T_STRING);
|
210
|
+
Check_Type(destination_path, T_STRING);
|
211
|
+
Check_Type(_generation, T_FIXNUM);
|
212
|
+
int64_t generation = FIX2INT(_generation);
|
213
|
+
|
214
|
+
std::ifstream input(RSTRING_PTR(source_path));
|
215
|
+
std::ofstream output(RSTRING_PTR(destination_path), std::ofstream::out);
|
216
|
+
int count = 0;
|
217
|
+
for (std::string line; getline( input, line );) {
|
218
|
+
int64_t object_generation;
|
219
|
+
dom::element object = parser.parse(line);
|
220
|
+
if (object["generation"].get(object_generation) || object_generation < generation) {
|
221
|
+
continue;
|
222
|
+
}
|
223
|
+
|
224
|
+
std::string_view file;
|
225
|
+
if (!object["file"].get(file) && file == "__hprof") {
|
226
|
+
continue;
|
227
|
+
}
|
228
|
+
|
229
|
+
count += 1;
|
230
|
+
output << line << std::endl;
|
231
|
+
}
|
232
|
+
|
233
|
+
output.close();
|
234
|
+
return INT2FIX(count);
|
235
|
+
}
|
236
|
+
|
237
|
+
extern "C" {
|
238
|
+
void Init_heap_profiler(void) {
|
239
|
+
sym_type = ID2SYM(rb_intern("type"));
|
240
|
+
sym_class = ID2SYM(rb_intern("class"));
|
241
|
+
sym_address = ID2SYM(rb_intern("address"));
|
242
|
+
sym_value = ID2SYM(rb_intern("value"));
|
243
|
+
sym_memsize = ID2SYM(rb_intern("memsize"));
|
244
|
+
sym_struct = ID2SYM(rb_intern("struct"));
|
245
|
+
sym_imemo_type = ID2SYM(rb_intern("imemo_type"));
|
246
|
+
sym_file = ID2SYM(rb_intern("file"));
|
247
|
+
sym_line = ID2SYM(rb_intern("line"));
|
248
|
+
sym_shared = ID2SYM(rb_intern("shared"));
|
249
|
+
sym_references = ID2SYM(rb_intern("references"));
|
250
|
+
|
251
|
+
VALUE rb_mHeapProfiler = rb_const_get(rb_cObject, rb_intern("HeapProfiler"));
|
252
|
+
VALUE rb_mHeapProfilerNative = rb_const_get(rb_mHeapProfiler, rb_intern("Native"));
|
253
|
+
|
254
|
+
rb_eHeapProfilerError = rb_const_get(rb_mHeapProfiler, rb_intern("Error"));
|
255
|
+
rb_global_variable(&rb_eHeapProfilerError);
|
256
|
+
|
257
|
+
rb_define_module_function(rb_mHeapProfilerNative, "_build_index", reinterpret_cast<VALUE (*)(...)>(rb_heap_build_index), 2);
|
258
|
+
rb_define_module_function(rb_mHeapProfilerNative, "parse_address", reinterpret_cast<VALUE (*)(...)>(rb_heap_parse_address), 1);
|
259
|
+
rb_define_module_function(rb_mHeapProfilerNative, "_load_many", reinterpret_cast<VALUE (*)(...)>(rb_heap_load_many), 2);
|
260
|
+
rb_define_module_function(rb_mHeapProfilerNative, "_filter_heap", reinterpret_cast<VALUE (*)(...)>(rb_heap_filter), 3);
|
261
|
+
}
|
262
|
+
}
|