heap-profiler 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@
3
3
  require 'bundler/setup'
4
4
  require "heap-profiler"
5
5
  require 'fileutils'
6
+ require 'date'
6
7
 
7
8
  class SomeCustomStuff
8
9
  end
@@ -22,6 +23,7 @@ else
22
23
  +"I am retained",
23
24
  +"I am retained too",
24
25
  SomeCustomStuff.new,
26
+ Date.today,
25
27
  ]
26
28
  "I am a very very long string " * 4
27
29
  [
@@ -2,5 +2,4 @@
2
2
  # frozen_string_literal: true
3
3
 
4
4
  require "heap_profiler/full"
5
-
6
5
  exit(HeapProfiler::CLI.new(ARGV).run)
@@ -8,7 +8,37 @@ static VALUE rb_eHeapProfilerError, sym_type, sym_class, sym_address, sym_value,
8
8
  sym_memsize, sym_imemo_type, sym_struct, sym_file, sym_line, sym_shared,
9
9
  sym_references;
10
10
 
11
- static dom::parser parser;
11
+ typedef struct {
12
+ dom::parser *parser;
13
+ } parser_t;
14
+
15
+ static void Parser_delete(void *ptr) {
16
+ parser_t *data = (parser_t*) ptr;
17
+ delete data->parser;
18
+ }
19
+
20
+ static size_t Parser_memsize(const void *parser) {
21
+ return sizeof(dom::parser); // TODO: low priority, figure the real size, e.g. internal buffers etc.
22
+ }
23
+
24
+ static const rb_data_type_t parser_data_type = {
25
+ "Parser",
26
+ { 0, Parser_delete, Parser_memsize, },
27
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
28
+ };
29
+
30
+ static VALUE parser_allocate(VALUE klass) {
31
+ parser_t *data;
32
+ VALUE obj = TypedData_Make_Struct(klass, parser_t, &parser_data_type, data);
33
+ data->parser = new dom::parser;
34
+ return obj;
35
+ }
36
+
37
+ static inline dom::parser * get_parser(VALUE self) {
38
+ parser_t *data;
39
+ TypedData_Get_Struct(self, parser_t, &parser_data_type, data);
40
+ return data->parser;
41
+ }
12
42
 
13
43
  const uint64_t digittoval[256] = {
14
44
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -29,41 +59,40 @@ const uint64_t digittoval[256] = {
29
59
  // Inspired by https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
30
60
  // Ruby addresses in heap dump are hexadecimal strings "0x000000000000"...0xffffffffffff".
31
61
  // The format being fairly stable allow for faster parsing. It should be equivalent to String#to_i(16).
32
- static inline uint64_t parse_address(const char * address) {
33
- return (
34
- digittoval[address[ 2]] << 44 |
35
- digittoval[address[ 3]] << 40 |
36
- digittoval[address[ 4]] << 36 |
37
- digittoval[address[ 5]] << 32 |
38
- digittoval[address[ 6]] << 28 |
39
- digittoval[address[ 7]] << 24 |
40
- digittoval[address[ 8]] << 20 |
41
- digittoval[address[ 9]] << 16 |
42
- digittoval[address[10]] << 12 |
43
- digittoval[address[11]] << 8 |
44
- digittoval[address[12]] << 4 |
45
- digittoval[address[13]]
46
- );
62
+ static inline uint64_t parse_address(const char * address, const long size) {
63
+ assert(address[0] == '0');
64
+ assert(address[1] == 'x');
65
+
66
+ uint64_t value = 0;
67
+ for (int index = 2; index < size; index++) {
68
+ value <<= 4;
69
+ value |= digittoval[address[index]];
70
+ }
71
+ return value;
72
+ }
73
+
74
+ static inline int64_t parse_address(std::string_view address) {
75
+ return parse_address(address.data(), address.size());
47
76
  }
48
77
 
49
- static inline int64_t parse_address(dom::element element) {
78
+ static inline int64_t parse_dom_address(dom::element element) {
50
79
  std::string_view address;
51
80
  if (element.get(address)) {
52
81
  return 0; // ROOT object
53
82
  }
54
- assert(address.size() == 14);
55
- return parse_address(address.data());
83
+ return parse_address(address);
56
84
  }
57
85
 
58
86
  static VALUE rb_heap_build_index(VALUE self, VALUE path, VALUE batch_size) {
59
87
  Check_Type(path, T_STRING);
60
88
  Check_Type(batch_size, T_FIXNUM);
89
+ dom::parser *parser = get_parser(self);
61
90
 
62
91
  VALUE string_index = rb_hash_new();
63
92
  VALUE class_index = rb_hash_new();
64
93
 
65
94
  try {
66
- auto [objects, error] = parser.load_many(RSTRING_PTR(path), FIX2INT(batch_size));
95
+ auto [objects, error] = parser->load_many(RSTRING_PTR(path), FIX2INT(batch_size));
67
96
  if (error != SUCCESS) {
68
97
  rb_raise(rb_eHeapProfilerError, "%s", error_message(error));
69
98
  }
@@ -77,22 +106,20 @@ static VALUE rb_heap_build_index(VALUE self, VALUE path, VALUE batch_size) {
77
106
  if (type == "STRING") {
78
107
  std::string_view value;
79
108
  if (!object["value"].get(value)) {
80
- VALUE address = INT2FIX(parse_address(object["address"]));
109
+ VALUE address = INT2FIX(parse_dom_address(object["address"]));
81
110
  VALUE string = rb_utf8_str_new(value.data(), value.size());
82
111
  rb_hash_aset(string_index, address, string);
83
112
  }
84
113
  } else if (type == "CLASS" || type == "MODULE") {
85
114
  std::string_view name;
86
115
  if (!object["name"].get(name)) {
87
- VALUE address = INT2FIX(parse_address(object["address"]));
116
+ VALUE address = INT2FIX(parse_dom_address(object["address"]));
88
117
  VALUE class_name = rb_utf8_str_new(name.data(), name.size());
89
118
  rb_hash_aset(class_index, address, class_name);
90
119
  }
91
120
  }
92
121
  }
93
- }
94
- catch (simdjson::simdjson_error error)
95
- {
122
+ } catch (simdjson::simdjson_error error) {
96
123
  rb_raise(rb_eHeapProfilerError, "exc: %s", error.what());
97
124
  }
98
125
 
@@ -104,8 +131,7 @@ static VALUE rb_heap_build_index(VALUE self, VALUE path, VALUE batch_size) {
104
131
 
105
132
  static VALUE rb_heap_parse_address(VALUE self, VALUE address) {
106
133
  Check_Type(address, T_STRING);
107
- assert(RSTRING_LEN(address) == 14);
108
- return INT2FIX(parse_address(RSTRING_PTR(address)));
134
+ return INT2FIX(parse_address(RSTRING_PTR(address), RSTRING_LEN(address)));
109
135
  }
110
136
 
111
137
  static VALUE make_ruby_object(dom::object object)
@@ -114,33 +140,36 @@ static VALUE make_ruby_object(dom::object object)
114
140
 
115
141
  std::string_view type;
116
142
  if (!object["type"].get(type)) {
117
- rb_hash_aset(hash, sym_type, rb_utf8_str_new(type.data(), type.size()));
143
+ rb_hash_aset(hash, sym_type, ID2SYM(rb_intern2(type.data(), type.size())));
118
144
  }
119
145
 
120
146
  std::string_view address;
121
147
  if (!object["address"].get(address)) {
122
- rb_hash_aset(hash, sym_address, INT2FIX(parse_address(address.data())));
148
+ rb_hash_aset(hash, sym_address, INT2FIX(parse_address(address)));
123
149
  }
124
150
 
125
151
  std::string_view _class;
126
152
  if (!object["class"].get(_class)) {
127
- rb_hash_aset(hash, sym_class, INT2FIX(parse_address(_class.data())));
153
+ rb_hash_aset(hash, sym_class, INT2FIX(parse_address(_class)));
128
154
  }
129
155
 
130
156
  uint64_t memsize;
131
- if (!object["memsize"].get(memsize)) {
157
+ if (object["memsize"].get(memsize)) {
158
+ // ROOT object
159
+ rb_hash_aset(hash, sym_memsize, INT2FIX(0));
160
+ } else {
132
161
  rb_hash_aset(hash, sym_memsize, INT2FIX(memsize));
133
162
  }
134
163
 
135
164
  if (type == "IMEMO") {
136
165
  std::string_view imemo_type;
137
166
  if (!object["imemo_type"].get(imemo_type)) {
138
- rb_hash_aset(hash, sym_imemo_type, rb_utf8_str_new(imemo_type.data(), imemo_type.size()));
167
+ rb_hash_aset(hash, sym_imemo_type, ID2SYM(rb_intern2(imemo_type.data(), imemo_type.size())));
139
168
  }
140
169
  } else if (type == "DATA") {
141
170
  std::string_view _struct;
142
171
  if (!object["struct"].get(_struct)) {
143
- rb_hash_aset(hash, sym_struct, rb_utf8_str_new(_struct.data(), _struct.size()));
172
+ rb_hash_aset(hash, sym_struct, ID2SYM(rb_intern2(_struct.data(), _struct.size())));
144
173
  }
145
174
  } else if (type == "STRING") {
146
175
  std::string_view value;
@@ -157,7 +186,7 @@ static VALUE make_ruby_object(dom::object object)
157
186
  for (dom::element reference_element : reference_elements) {
158
187
  std::string_view reference;
159
188
  if (!reference_element.get(reference)) {
160
- rb_ary_push(references, INT2FIX(parse_address(reference.data())));
189
+ rb_ary_push(references, INT2FIX(parse_address(reference)));
161
190
  }
162
191
  }
163
192
  rb_hash_aset(hash, sym_references, references);
@@ -178,60 +207,45 @@ static VALUE make_ruby_object(dom::object object)
178
207
  return hash;
179
208
  }
180
209
 
181
- static VALUE rb_heap_load_many(VALUE self, VALUE arg, VALUE batch_size)
210
+ static VALUE rb_heap_load_many(VALUE self, VALUE arg, VALUE since, VALUE batch_size)
182
211
  {
183
212
  Check_Type(arg, T_STRING);
184
213
  Check_Type(batch_size, T_FIXNUM);
185
214
 
186
- try
187
- {
188
- auto [docs, error] = parser.load_many(RSTRING_PTR(arg), FIX2INT(batch_size));
189
- if (error != SUCCESS)
190
- {
191
- rb_raise(rb_eHeapProfilerError, "%s", error_message(error));
192
- }
215
+ dom::parser *parser = get_parser(self);
193
216
 
194
- for (dom::element doc : docs)
195
- {
196
- rb_yield(make_ruby_object(doc));
217
+ try {
218
+ auto [objects, error] = parser->load_many(RSTRING_PTR(arg), FIX2INT(batch_size));
219
+ if (error != SUCCESS) {
220
+ rb_raise(rb_eHeapProfilerError, "%s", error_message(error));
197
221
  }
198
222
 
199
- return Qnil;
200
- }
201
- catch (simdjson::simdjson_error error)
202
- {
203
- rb_raise(rb_eHeapProfilerError, "%s", error.what());
204
- }
205
- }
223
+ if (RTEST(since)) {
224
+ Check_Type(since, T_FIXNUM);
225
+ int64_t generation = FIX2INT(since);
226
+ for (dom::element object : objects) {
227
+ int64_t object_generation;
228
+ if (object["generation"].get(object_generation) || object_generation < generation) {
229
+ continue;
230
+ }
206
231
 
207
- static VALUE rb_heap_filter(VALUE self, VALUE source_path, VALUE destination_path, VALUE _generation)
208
- {
209
- Check_Type(source_path, T_STRING);
210
- Check_Type(destination_path, T_STRING);
211
- Check_Type(_generation, T_FIXNUM);
212
- int64_t generation = FIX2INT(_generation);
213
-
214
- std::ifstream input(RSTRING_PTR(source_path));
215
- std::ofstream output(RSTRING_PTR(destination_path), std::ofstream::out);
216
- int count = 0;
217
- for (std::string line; getline( input, line );) {
218
- int64_t object_generation;
219
- dom::element object = parser.parse(line);
220
- if (object["generation"].get(object_generation) || object_generation < generation) {
221
- continue;
222
- }
232
+ std::string_view file;
233
+ if (!object["file"].get(file) && file == "__hprof") {
234
+ continue;
235
+ }
223
236
 
224
- std::string_view file;
225
- if (!object["file"].get(file) && file == "__hprof") {
226
- continue;
237
+ rb_yield(make_ruby_object(object));
238
+ }
239
+ } else {
240
+ for (dom::element object : objects) {
241
+ rb_yield(make_ruby_object(object));
242
+ }
227
243
  }
228
244
 
229
- count += 1;
230
- output << line << std::endl;
245
+ return Qnil;
246
+ } catch (simdjson::simdjson_error error) {
247
+ rb_raise(rb_eHeapProfilerError, "%s", error.what());
231
248
  }
232
-
233
- output.close();
234
- return INT2FIX(count);
235
249
  }
236
250
 
237
251
  extern "C" {
@@ -249,14 +263,14 @@ extern "C" {
249
263
  sym_references = ID2SYM(rb_intern("references"));
250
264
 
251
265
  VALUE rb_mHeapProfiler = rb_const_get(rb_cObject, rb_intern("HeapProfiler"));
252
- VALUE rb_mHeapProfilerNative = rb_const_get(rb_mHeapProfiler, rb_intern("Native"));
253
266
 
254
267
  rb_eHeapProfilerError = rb_const_get(rb_mHeapProfiler, rb_intern("Error"));
255
268
  rb_global_variable(&rb_eHeapProfilerError);
256
269
 
257
- rb_define_module_function(rb_mHeapProfilerNative, "_build_index", reinterpret_cast<VALUE (*)(...)>(rb_heap_build_index), 2);
258
- rb_define_module_function(rb_mHeapProfilerNative, "parse_address", reinterpret_cast<VALUE (*)(...)>(rb_heap_parse_address), 1);
259
- rb_define_module_function(rb_mHeapProfilerNative, "_load_many", reinterpret_cast<VALUE (*)(...)>(rb_heap_load_many), 2);
260
- rb_define_module_function(rb_mHeapProfilerNative, "_filter_heap", reinterpret_cast<VALUE (*)(...)>(rb_heap_filter), 3);
270
+ VALUE rb_mHeapProfilerParserNative = rb_const_get(rb_const_get(rb_mHeapProfiler, rb_intern("Parser")), rb_intern("Native"));
271
+ rb_define_alloc_func(rb_mHeapProfilerParserNative, parser_allocate);
272
+ rb_define_method(rb_mHeapProfilerParserNative, "_build_index", reinterpret_cast<VALUE (*)(...)>(rb_heap_build_index), 2);
273
+ rb_define_method(rb_mHeapProfilerParserNative, "parse_address", reinterpret_cast<VALUE (*)(...)>(rb_heap_parse_address), 1);
274
+ rb_define_method(rb_mHeapProfilerParserNative, "_load_many", reinterpret_cast<VALUE (*)(...)>(rb_heap_load_many), 3);
261
275
  }
262
276
  }
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
11
11
  spec.description = 'Make several heap dumps and summarize allocated, retained memory'
12
12
  spec.homepage = "https://github.com/Shopify/heap-profiler"
13
13
  spec.license = "MIT"
14
- spec.required_ruby_version = Gem::Requirement.new(">= 2.6.0")
14
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
15
15
 
16
16
  spec.metadata["allowed_push_host"] = "https://rubygems.org/"
17
17
 
@@ -3,55 +3,113 @@
3
3
  module HeapProfiler
4
4
  class Analyzer
5
5
  class Dimension
6
- METRICS = {
7
- "objects" => -> (_object) { 1 },
8
- "memory" => -> (object) { object[:memsize].to_i },
9
- }.freeze
10
-
11
- attr_reader :stats
12
- def initialize(metric)
13
- @stats = 0
14
- @metric = METRICS.fetch(metric)
6
+ attr_reader :objects, :memory
7
+ def initialize
8
+ @objects = 0
9
+ @memory = 0
15
10
  end
16
11
 
17
12
  def process(_index, object)
18
- @stats += @metric.call(object)
13
+ @objects += 1
14
+ unless object[:memsize]
15
+ p object
16
+ end
17
+ @memory += object[:memsize]
19
18
  end
20
19
 
21
- def sort!
20
+ def stats(metric)
21
+ case metric
22
+ when "objects"
23
+ objects
24
+ when "memory"
25
+ memory
26
+ else
27
+ raise "Invalid metric: #{metric.inspect}"
28
+ end
22
29
  end
23
30
  end
24
31
 
25
32
  class GroupedDimension < Dimension
26
- GROUPINGS = {
27
- "file" => -> (_index, object) { object[:file] },
28
- "location" => -> (_index, object) do
29
- if (file = object[:file]) && (line = object[:line])
30
- "#{file}:#{line}"
33
+ class << self
34
+ def build(grouping)
35
+ klass = case grouping
36
+ when "file"
37
+ FileGroupDimension
38
+ when "location"
39
+ LocationGroupDimension
40
+ when "gem"
41
+ GemGroupDimension
42
+ when "class"
43
+ ClassGroupDimension
44
+ else
45
+ raise "Unknown grouping key: #{grouping.inspect}"
31
46
  end
32
- end,
33
- "gem" => -> (index, object) { index.guess_gem(object[:file]) },
34
- "class" => -> (index, object) { index.guess_class(object) },
35
- }.freeze
47
+ klass.new
48
+ end
49
+ end
36
50
 
37
- attr_reader :stats
38
- def initialize(metric, grouping)
39
- super(metric)
40
- @grouping = GROUPINGS.fetch(grouping)
41
- @stats = Hash.new { |h, k| h[k] = 0 }
51
+ def initialize
52
+ @objects = Hash.new { |h, k| h[k] = 0 }
53
+ @memory = Hash.new { |h, k| h[k] = 0 }
42
54
  end
43
55
 
44
56
  def process(index, object)
45
57
  if (group = @grouping.call(index, object))
46
- @stats[group] += @metric.call(object)
58
+ @objects[group] += 1
59
+ @memory[group] += object[:memsize]
47
60
  end
48
61
  end
49
62
 
50
- def top_n(max)
51
- stats.sort do |a, b|
63
+ def top_n(metric, max)
64
+ values = stats(metric).sort do |a, b|
65
+ b[1] <=> a[1]
66
+ end
67
+ top = values.take(max)
68
+ top.sort! do |a, b|
52
69
  cmp = b[1] <=> a[1]
53
70
  cmp == 0 ? b[0] <=> a[0] : cmp
54
- end.take(max)
71
+ end
72
+ top
73
+ end
74
+ end
75
+
76
+ class FileGroupDimension < GroupedDimension
77
+ def process(_index, object)
78
+ if (group = object[:file])
79
+ @objects[group] += 1
80
+ @memory[group] += object[:memsize]
81
+ end
82
+ end
83
+ end
84
+
85
+ class LocationGroupDimension < GroupedDimension
86
+ def process(_index, object)
87
+ file = object[:file]
88
+ line = object[:line]
89
+
90
+ if file && line
91
+ group = "#{file}:#{line}"
92
+ @objects[group] += 1
93
+ @memory[group] += object[:memsize]
94
+ end
95
+ end
96
+ end
97
+
98
+ class GemGroupDimension < GroupedDimension
99
+ def process(index, object)
100
+ if (group = index.guess_gem(object))
101
+ @objects[group] += 1
102
+ @memory[group] += object[:memsize]
103
+ end
104
+ end
105
+ end
106
+
107
+ class ClassGroupDimension < GroupedDimension
108
+ def process(index, object)
109
+ if (group = index.guess_class(object))
110
+ @objects[group] += 1
111
+ @memory[group] += object[:memsize]
112
+ end
55
113
  end
56
114
  end
57
115
 
@@ -103,7 +161,7 @@ module HeapProfiler
103
161
  end
104
162
 
105
163
  def process(_index, object)
106
- return unless object[:type] == "STRING"
164
+ return unless object[:type] == :STRING
107
165
  value = object[:value]
108
166
  return unless value # broken strings etc
109
167
  @stats[value].process(object)
@@ -112,10 +170,14 @@ module HeapProfiler
112
170
  def top_n(max)
113
171
  values = @stats.values
114
172
  values.sort! do |a, b|
173
+ b.count <=> a.count
174
+ end
175
+ top = values.take(max)
176
+ top.sort! do |a, b|
115
177
  cmp = b.count <=> a.count
116
178
  cmp == 0 ? b.value <=> a.value : cmp
117
179
  end
118
- values.take(max)
180
+ top
119
181
  end
120
182
  end
121
183
 
@@ -130,9 +192,9 @@ module HeapProfiler
130
192
  if metric == "strings"
131
193
  dimensions["strings"] = StringDimension.new
132
194
  else
133
- dimensions["total_#{metric}"] = Dimension.new(metric)
195
+ dimensions["total"] = Dimension.new
134
196
  groupings.each do |grouping|
135
- dimensions["#{metric}_by_#{grouping}"] = GroupedDimension.new(metric, grouping)
197
+ dimensions[grouping] = GroupedDimension.build(grouping)
136
198
  end
137
199
  end
138
200
  end