heap-profiler 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/tests.yml +2 -2
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/README.md +177 -169
- data/TODO.md +0 -6
- data/benchmark/address-parsing.rb +5 -2
- data/benchmark/indexing.rb +5 -2
- data/bin/generate-report +2 -0
- data/exe/heap-profiler +0 -1
- data/ext/heap_profiler/heap_profiler.cpp +95 -81
- data/heap-profiler.gemspec +1 -1
- data/lib/heap_profiler/analyzer.rb +95 -33
- data/lib/heap_profiler/diff.rb +13 -10
- data/lib/heap_profiler/dump.rb +2 -6
- data/lib/heap_profiler/full.rb +1 -1
- data/lib/heap_profiler/index.rb +27 -33
- data/lib/heap_profiler/{native.rb → parser.rb} +34 -19
- data/lib/heap_profiler/results.rb +7 -7
- data/lib/heap_profiler/version.rb +1 -1
- metadata +5 -5
data/bin/generate-report
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require 'bundler/setup'
|
4
4
|
require "heap-profiler"
|
5
5
|
require 'fileutils'
|
6
|
+
require 'date'
|
6
7
|
|
7
8
|
class SomeCustomStuff
|
8
9
|
end
|
@@ -22,6 +23,7 @@ else
|
|
22
23
|
+"I am retained",
|
23
24
|
+"I am retained too",
|
24
25
|
SomeCustomStuff.new,
|
26
|
+
Date.today,
|
25
27
|
]
|
26
28
|
"I am a very very long string " * 4
|
27
29
|
[
|
data/exe/heap-profiler
CHANGED
@@ -8,7 +8,37 @@ static VALUE rb_eHeapProfilerError, sym_type, sym_class, sym_address, sym_value,
|
|
8
8
|
sym_memsize, sym_imemo_type, sym_struct, sym_file, sym_line, sym_shared,
|
9
9
|
sym_references;
|
10
10
|
|
11
|
-
|
11
|
+
typedef struct {
|
12
|
+
dom::parser *parser;
|
13
|
+
} parser_t;
|
14
|
+
|
15
|
+
static void Parser_delete(void *ptr) {
|
16
|
+
parser_t *data = (parser_t*) ptr;
|
17
|
+
delete data->parser;
|
18
|
+
}
|
19
|
+
|
20
|
+
static size_t Parser_memsize(const void *parser) {
|
21
|
+
return sizeof(dom::parser); // TODO: low priority, figure the real size, e.g. internal buffers etc.
|
22
|
+
}
|
23
|
+
|
24
|
+
static const rb_data_type_t parser_data_type = {
|
25
|
+
"Parser",
|
26
|
+
{ 0, Parser_delete, Parser_memsize, },
|
27
|
+
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
|
28
|
+
};
|
29
|
+
|
30
|
+
static VALUE parser_allocate(VALUE klass) {
|
31
|
+
parser_t *data;
|
32
|
+
VALUE obj = TypedData_Make_Struct(klass, parser_t, &parser_data_type, data);
|
33
|
+
data->parser = new dom::parser;
|
34
|
+
return obj;
|
35
|
+
}
|
36
|
+
|
37
|
+
static inline dom::parser * get_parser(VALUE self) {
|
38
|
+
parser_t *data;
|
39
|
+
TypedData_Get_Struct(self, parser_t, &parser_data_type, data);
|
40
|
+
return data->parser;
|
41
|
+
}
|
12
42
|
|
13
43
|
const uint64_t digittoval[256] = {
|
14
44
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
@@ -29,41 +59,40 @@ const uint64_t digittoval[256] = {
|
|
29
59
|
// Inspired by https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
|
30
60
|
// Ruby addresses in heap dump are hexadecimal strings "0x000000000000"...0xffffffffffff".
|
31
61
|
// The format being fairly stable allow for faster parsing. It should be equivalent to String#to_i(16).
|
32
|
-
static inline uint64_t parse_address(const char * address) {
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
digittoval[address[
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
);
|
62
|
+
static inline uint64_t parse_address(const char * address, const long size) {
|
63
|
+
assert(address[0] == '0');
|
64
|
+
assert(address[1] == 'x');
|
65
|
+
|
66
|
+
uint64_t value = 0;
|
67
|
+
for (int index = 2; index < size; index++) {
|
68
|
+
value <<= 4;
|
69
|
+
value |= digittoval[address[index]];
|
70
|
+
}
|
71
|
+
return value;
|
72
|
+
}
|
73
|
+
|
74
|
+
static inline int64_t parse_address(std::string_view address) {
|
75
|
+
return parse_address(address.data(), address.size());
|
47
76
|
}
|
48
77
|
|
49
|
-
static inline int64_t
|
78
|
+
static inline int64_t parse_dom_address(dom::element element) {
|
50
79
|
std::string_view address;
|
51
80
|
if (element.get(address)) {
|
52
81
|
return 0; // ROOT object
|
53
82
|
}
|
54
|
-
|
55
|
-
return parse_address(address.data());
|
83
|
+
return parse_address(address);
|
56
84
|
}
|
57
85
|
|
58
86
|
static VALUE rb_heap_build_index(VALUE self, VALUE path, VALUE batch_size) {
|
59
87
|
Check_Type(path, T_STRING);
|
60
88
|
Check_Type(batch_size, T_FIXNUM);
|
89
|
+
dom::parser *parser = get_parser(self);
|
61
90
|
|
62
91
|
VALUE string_index = rb_hash_new();
|
63
92
|
VALUE class_index = rb_hash_new();
|
64
93
|
|
65
94
|
try {
|
66
|
-
auto [objects, error] = parser
|
95
|
+
auto [objects, error] = parser->load_many(RSTRING_PTR(path), FIX2INT(batch_size));
|
67
96
|
if (error != SUCCESS) {
|
68
97
|
rb_raise(rb_eHeapProfilerError, "%s", error_message(error));
|
69
98
|
}
|
@@ -77,22 +106,20 @@ static VALUE rb_heap_build_index(VALUE self, VALUE path, VALUE batch_size) {
|
|
77
106
|
if (type == "STRING") {
|
78
107
|
std::string_view value;
|
79
108
|
if (!object["value"].get(value)) {
|
80
|
-
VALUE address = INT2FIX(
|
109
|
+
VALUE address = INT2FIX(parse_dom_address(object["address"]));
|
81
110
|
VALUE string = rb_utf8_str_new(value.data(), value.size());
|
82
111
|
rb_hash_aset(string_index, address, string);
|
83
112
|
}
|
84
113
|
} else if (type == "CLASS" || type == "MODULE") {
|
85
114
|
std::string_view name;
|
86
115
|
if (!object["name"].get(name)) {
|
87
|
-
VALUE address = INT2FIX(
|
116
|
+
VALUE address = INT2FIX(parse_dom_address(object["address"]));
|
88
117
|
VALUE class_name = rb_utf8_str_new(name.data(), name.size());
|
89
118
|
rb_hash_aset(class_index, address, class_name);
|
90
119
|
}
|
91
120
|
}
|
92
121
|
}
|
93
|
-
}
|
94
|
-
catch (simdjson::simdjson_error error)
|
95
|
-
{
|
122
|
+
} catch (simdjson::simdjson_error error) {
|
96
123
|
rb_raise(rb_eHeapProfilerError, "exc: %s", error.what());
|
97
124
|
}
|
98
125
|
|
@@ -104,8 +131,7 @@ static VALUE rb_heap_build_index(VALUE self, VALUE path, VALUE batch_size) {
|
|
104
131
|
|
105
132
|
static VALUE rb_heap_parse_address(VALUE self, VALUE address) {
|
106
133
|
Check_Type(address, T_STRING);
|
107
|
-
|
108
|
-
return INT2FIX(parse_address(RSTRING_PTR(address)));
|
134
|
+
return INT2FIX(parse_address(RSTRING_PTR(address), RSTRING_LEN(address)));
|
109
135
|
}
|
110
136
|
|
111
137
|
static VALUE make_ruby_object(dom::object object)
|
@@ -114,33 +140,36 @@ static VALUE make_ruby_object(dom::object object)
|
|
114
140
|
|
115
141
|
std::string_view type;
|
116
142
|
if (!object["type"].get(type)) {
|
117
|
-
rb_hash_aset(hash, sym_type,
|
143
|
+
rb_hash_aset(hash, sym_type, ID2SYM(rb_intern2(type.data(), type.size())));
|
118
144
|
}
|
119
145
|
|
120
146
|
std::string_view address;
|
121
147
|
if (!object["address"].get(address)) {
|
122
|
-
rb_hash_aset(hash, sym_address, INT2FIX(parse_address(address
|
148
|
+
rb_hash_aset(hash, sym_address, INT2FIX(parse_address(address)));
|
123
149
|
}
|
124
150
|
|
125
151
|
std::string_view _class;
|
126
152
|
if (!object["class"].get(_class)) {
|
127
|
-
rb_hash_aset(hash, sym_class, INT2FIX(parse_address(_class
|
153
|
+
rb_hash_aset(hash, sym_class, INT2FIX(parse_address(_class)));
|
128
154
|
}
|
129
155
|
|
130
156
|
uint64_t memsize;
|
131
|
-
if (
|
157
|
+
if (object["memsize"].get(memsize)) {
|
158
|
+
// ROOT object
|
159
|
+
rb_hash_aset(hash, sym_memsize, INT2FIX(0));
|
160
|
+
} else {
|
132
161
|
rb_hash_aset(hash, sym_memsize, INT2FIX(memsize));
|
133
162
|
}
|
134
163
|
|
135
164
|
if (type == "IMEMO") {
|
136
165
|
std::string_view imemo_type;
|
137
166
|
if (!object["imemo_type"].get(imemo_type)) {
|
138
|
-
rb_hash_aset(hash, sym_imemo_type,
|
167
|
+
rb_hash_aset(hash, sym_imemo_type, ID2SYM(rb_intern2(imemo_type.data(), imemo_type.size())));
|
139
168
|
}
|
140
169
|
} else if (type == "DATA") {
|
141
170
|
std::string_view _struct;
|
142
171
|
if (!object["struct"].get(_struct)) {
|
143
|
-
rb_hash_aset(hash, sym_struct,
|
172
|
+
rb_hash_aset(hash, sym_struct, ID2SYM(rb_intern2(_struct.data(), _struct.size())));
|
144
173
|
}
|
145
174
|
} else if (type == "STRING") {
|
146
175
|
std::string_view value;
|
@@ -157,7 +186,7 @@ static VALUE make_ruby_object(dom::object object)
|
|
157
186
|
for (dom::element reference_element : reference_elements) {
|
158
187
|
std::string_view reference;
|
159
188
|
if (!reference_element.get(reference)) {
|
160
|
-
rb_ary_push(references, INT2FIX(parse_address(reference
|
189
|
+
rb_ary_push(references, INT2FIX(parse_address(reference)));
|
161
190
|
}
|
162
191
|
}
|
163
192
|
rb_hash_aset(hash, sym_references, references);
|
@@ -178,60 +207,45 @@ static VALUE make_ruby_object(dom::object object)
|
|
178
207
|
return hash;
|
179
208
|
}
|
180
209
|
|
181
|
-
static VALUE rb_heap_load_many(VALUE self, VALUE arg, VALUE batch_size)
|
210
|
+
static VALUE rb_heap_load_many(VALUE self, VALUE arg, VALUE since, VALUE batch_size)
|
182
211
|
{
|
183
212
|
Check_Type(arg, T_STRING);
|
184
213
|
Check_Type(batch_size, T_FIXNUM);
|
185
214
|
|
186
|
-
|
187
|
-
{
|
188
|
-
auto [docs, error] = parser.load_many(RSTRING_PTR(arg), FIX2INT(batch_size));
|
189
|
-
if (error != SUCCESS)
|
190
|
-
{
|
191
|
-
rb_raise(rb_eHeapProfilerError, "%s", error_message(error));
|
192
|
-
}
|
215
|
+
dom::parser *parser = get_parser(self);
|
193
216
|
|
194
|
-
|
195
|
-
|
196
|
-
|
217
|
+
try {
|
218
|
+
auto [objects, error] = parser->load_many(RSTRING_PTR(arg), FIX2INT(batch_size));
|
219
|
+
if (error != SUCCESS) {
|
220
|
+
rb_raise(rb_eHeapProfilerError, "%s", error_message(error));
|
197
221
|
}
|
198
222
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
223
|
+
if (RTEST(since)) {
|
224
|
+
Check_Type(since, T_FIXNUM);
|
225
|
+
int64_t generation = FIX2INT(since);
|
226
|
+
for (dom::element object : objects) {
|
227
|
+
int64_t object_generation;
|
228
|
+
if (object["generation"].get(object_generation) || object_generation < generation) {
|
229
|
+
continue;
|
230
|
+
}
|
206
231
|
|
207
|
-
|
208
|
-
{
|
209
|
-
|
210
|
-
|
211
|
-
Check_Type(_generation, T_FIXNUM);
|
212
|
-
int64_t generation = FIX2INT(_generation);
|
213
|
-
|
214
|
-
std::ifstream input(RSTRING_PTR(source_path));
|
215
|
-
std::ofstream output(RSTRING_PTR(destination_path), std::ofstream::out);
|
216
|
-
int count = 0;
|
217
|
-
for (std::string line; getline( input, line );) {
|
218
|
-
int64_t object_generation;
|
219
|
-
dom::element object = parser.parse(line);
|
220
|
-
if (object["generation"].get(object_generation) || object_generation < generation) {
|
221
|
-
continue;
|
222
|
-
}
|
232
|
+
std::string_view file;
|
233
|
+
if (!object["file"].get(file) && file == "__hprof") {
|
234
|
+
continue;
|
235
|
+
}
|
223
236
|
|
224
|
-
|
225
|
-
|
226
|
-
|
237
|
+
rb_yield(make_ruby_object(object));
|
238
|
+
}
|
239
|
+
} else {
|
240
|
+
for (dom::element object : objects) {
|
241
|
+
rb_yield(make_ruby_object(object));
|
242
|
+
}
|
227
243
|
}
|
228
244
|
|
229
|
-
|
230
|
-
|
245
|
+
return Qnil;
|
246
|
+
} catch (simdjson::simdjson_error error) {
|
247
|
+
rb_raise(rb_eHeapProfilerError, "%s", error.what());
|
231
248
|
}
|
232
|
-
|
233
|
-
output.close();
|
234
|
-
return INT2FIX(count);
|
235
249
|
}
|
236
250
|
|
237
251
|
extern "C" {
|
@@ -249,14 +263,14 @@ extern "C" {
|
|
249
263
|
sym_references = ID2SYM(rb_intern("references"));
|
250
264
|
|
251
265
|
VALUE rb_mHeapProfiler = rb_const_get(rb_cObject, rb_intern("HeapProfiler"));
|
252
|
-
VALUE rb_mHeapProfilerNative = rb_const_get(rb_mHeapProfiler, rb_intern("Native"));
|
253
266
|
|
254
267
|
rb_eHeapProfilerError = rb_const_get(rb_mHeapProfiler, rb_intern("Error"));
|
255
268
|
rb_global_variable(&rb_eHeapProfilerError);
|
256
269
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
270
|
+
VALUE rb_mHeapProfilerParserNative = rb_const_get(rb_const_get(rb_mHeapProfiler, rb_intern("Parser")), rb_intern("Native"));
|
271
|
+
rb_define_alloc_func(rb_mHeapProfilerParserNative, parser_allocate);
|
272
|
+
rb_define_method(rb_mHeapProfilerParserNative, "_build_index", reinterpret_cast<VALUE (*)(...)>(rb_heap_build_index), 2);
|
273
|
+
rb_define_method(rb_mHeapProfilerParserNative, "parse_address", reinterpret_cast<VALUE (*)(...)>(rb_heap_parse_address), 1);
|
274
|
+
rb_define_method(rb_mHeapProfilerParserNative, "_load_many", reinterpret_cast<VALUE (*)(...)>(rb_heap_load_many), 3);
|
261
275
|
}
|
262
276
|
}
|
data/heap-profiler.gemspec
CHANGED
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
|
|
11
11
|
spec.description = 'Make several heap dumps and summarize allocated, retained memory'
|
12
12
|
spec.homepage = "https://github.com/Shopify/heap-profiler"
|
13
13
|
spec.license = "MIT"
|
14
|
-
spec.required_ruby_version = Gem::Requirement.new(">= 2.
|
14
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
|
15
15
|
|
16
16
|
spec.metadata["allowed_push_host"] = "https://rubygems.org/"
|
17
17
|
|
@@ -3,55 +3,113 @@
|
|
3
3
|
module HeapProfiler
|
4
4
|
class Analyzer
|
5
5
|
class Dimension
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
attr_reader :stats
|
12
|
-
def initialize(metric)
|
13
|
-
@stats = 0
|
14
|
-
@metric = METRICS.fetch(metric)
|
6
|
+
attr_reader :objects, :memory
|
7
|
+
def initialize
|
8
|
+
@objects = 0
|
9
|
+
@memory = 0
|
15
10
|
end
|
16
11
|
|
17
12
|
def process(_index, object)
|
18
|
-
@
|
13
|
+
@objects += 1
|
14
|
+
unless object[:memsize]
|
15
|
+
p object
|
16
|
+
end
|
17
|
+
@memory += object[:memsize]
|
19
18
|
end
|
20
19
|
|
21
|
-
def
|
20
|
+
def stats(metric)
|
21
|
+
case metric
|
22
|
+
when "objects"
|
23
|
+
objects
|
24
|
+
when "memory"
|
25
|
+
memory
|
26
|
+
else
|
27
|
+
raise "Invalid metric: #{metric.inspect}"
|
28
|
+
end
|
22
29
|
end
|
23
30
|
end
|
24
31
|
|
25
32
|
class GroupedDimension < Dimension
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
33
|
+
class << self
|
34
|
+
def build(grouping)
|
35
|
+
klass = case grouping
|
36
|
+
when "file"
|
37
|
+
FileGroupDimension
|
38
|
+
when "location"
|
39
|
+
LocationGroupDimension
|
40
|
+
when "gem"
|
41
|
+
GemGroupDimension
|
42
|
+
when "class"
|
43
|
+
ClassGroupDimension
|
44
|
+
else
|
45
|
+
raise "Unknown grouping key: #{grouping.inspect}"
|
31
46
|
end
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
}.freeze
|
47
|
+
klass.new
|
48
|
+
end
|
49
|
+
end
|
36
50
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
@grouping = GROUPINGS.fetch(grouping)
|
41
|
-
@stats = Hash.new { |h, k| h[k] = 0 }
|
51
|
+
def initialize
|
52
|
+
@objects = Hash.new { |h, k| h[k] = 0 }
|
53
|
+
@memory = Hash.new { |h, k| h[k] = 0 }
|
42
54
|
end
|
43
55
|
|
44
56
|
def process(index, object)
|
45
57
|
if (group = @grouping.call(index, object))
|
46
|
-
@
|
58
|
+
@objects[group] += 1
|
59
|
+
@memory[group] += object[:memsize]
|
47
60
|
end
|
48
61
|
end
|
49
62
|
|
50
|
-
def top_n(max)
|
51
|
-
stats.sort do |a, b|
|
63
|
+
def top_n(metric, max)
|
64
|
+
values = stats(metric).sort do |a, b|
|
65
|
+
b[1] <=> a[1]
|
66
|
+
end
|
67
|
+
top = values.take(max)
|
68
|
+
top.sort! do |a, b|
|
52
69
|
cmp = b[1] <=> a[1]
|
53
70
|
cmp == 0 ? b[0] <=> a[0] : cmp
|
54
|
-
end
|
71
|
+
end
|
72
|
+
top
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
class FileGroupDimension < GroupedDimension
|
77
|
+
def process(_index, object)
|
78
|
+
if (group = object[:file])
|
79
|
+
@objects[group] += 1
|
80
|
+
@memory[group] += object[:memsize]
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class LocationGroupDimension < GroupedDimension
|
86
|
+
def process(_index, object)
|
87
|
+
file = object[:file]
|
88
|
+
line = object[:line]
|
89
|
+
|
90
|
+
if file && line
|
91
|
+
group = "#{file}:#{line}"
|
92
|
+
@objects[group] += 1
|
93
|
+
@memory[group] += object[:memsize]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class GemGroupDimension < GroupedDimension
|
99
|
+
def process(index, object)
|
100
|
+
if (group = index.guess_gem(object))
|
101
|
+
@objects[group] += 1
|
102
|
+
@memory[group] += object[:memsize]
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
class ClassGroupDimension < GroupedDimension
|
108
|
+
def process(index, object)
|
109
|
+
if (group = index.guess_class(object))
|
110
|
+
@objects[group] += 1
|
111
|
+
@memory[group] += object[:memsize]
|
112
|
+
end
|
55
113
|
end
|
56
114
|
end
|
57
115
|
|
@@ -103,7 +161,7 @@ module HeapProfiler
|
|
103
161
|
end
|
104
162
|
|
105
163
|
def process(_index, object)
|
106
|
-
return unless object[:type] ==
|
164
|
+
return unless object[:type] == :STRING
|
107
165
|
value = object[:value]
|
108
166
|
return unless value # broken strings etc
|
109
167
|
@stats[value].process(object)
|
@@ -112,10 +170,14 @@ module HeapProfiler
|
|
112
170
|
def top_n(max)
|
113
171
|
values = @stats.values
|
114
172
|
values.sort! do |a, b|
|
173
|
+
b.count <=> a.count
|
174
|
+
end
|
175
|
+
top = values.take(max)
|
176
|
+
top.sort! do |a, b|
|
115
177
|
cmp = b.count <=> a.count
|
116
178
|
cmp == 0 ? b.value <=> a.value : cmp
|
117
179
|
end
|
118
|
-
|
180
|
+
top
|
119
181
|
end
|
120
182
|
end
|
121
183
|
|
@@ -130,9 +192,9 @@ module HeapProfiler
|
|
130
192
|
if metric == "strings"
|
131
193
|
dimensions["strings"] = StringDimension.new
|
132
194
|
else
|
133
|
-
dimensions["
|
195
|
+
dimensions["total"] = Dimension.new
|
134
196
|
groupings.each do |grouping|
|
135
|
-
dimensions[
|
197
|
+
dimensions[grouping] = GroupedDimension.build(grouping)
|
136
198
|
end
|
137
199
|
end
|
138
200
|
end
|