heap-profiler 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+ require "bundler/gem_tasks"
3
+ require "rake/testtask"
4
+ require "rake/extensiontask"
5
+
6
+ Rake::ExtensionTask.new("heap_profiler") do |ext|
7
+ ext.ext_dir = 'ext/heap_profiler'
8
+ ext.lib_dir = "lib/heap_profiler"
9
+ end
10
+
11
+ Rake::TestTask.new(:test) do |t|
12
+ t.libs << "test"
13
+ t.libs << "lib"
14
+ t.test_files = FileList["test/**/*_test.rb"]
15
+ end
16
+
17
+ task default: %i(compile test)
data/TODO.md ADDED
@@ -0,0 +1,9 @@
1
+ ### Important
2
+
3
+ - Test with other Ruby versions.
4
+
5
+ ### Explore
6
+
7
+ - Performance optimization.
8
+ - Would filtering on the `generation` field be faster than the address extraction?
9
+ - Detect object growth?
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "benchmark/ips"
6
+ require "heap_profiler/full"
7
+
8
+ Benchmark.ips do |x|
9
+ x.report("ruby") { HeapProfiler::Native.ruby_parse_address("0x7f921e88a8f8") }
10
+ x.report("cpp") { HeapProfiler::Native.parse_address("0x7f921e88a8f8") }
11
+ x.compare!
12
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "benchmark/ips"
6
+ require "heap_profiler/full"
7
+
8
+ FIXTURE_PATH = File.expand_path("../../test/fixtures/diffed-heap/allocated.heap", __FILE__)
9
+
10
+ Benchmark.ips do |x|
11
+ x.report("ruby") { HeapProfiler::Native.ruby_build_index(FIXTURE_PATH) }
12
+ x.report("cpp") { HeapProfiler::Native.build_index(FIXTURE_PATH) }
13
+ x.compare!
14
+ end
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "heap-profiler"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+ require 'bundler/setup'
4
+ require "heap-profiler"
5
+ require 'fileutils'
6
+
7
+ class SomeCustomStuff
8
+ end
9
+
10
+ $freed_objects = [+"i am free", +"i am free too"]
11
+
12
+ dir = ARGV.first
13
+ FileUtils.mkdir_p(dir)
14
+ FileUtils.rm_rf(Dir[File.join(dir, '*')])
15
+
16
+ if ARGV[1] == '--empty'
17
+ HeapProfiler.report(dir) {}
18
+ else
19
+ HeapProfiler.report(dir) do
20
+ $retained_objects = [
21
+ +"I am retained",
22
+ +"I am retained",
23
+ +"I am retained too",
24
+ SomeCustomStuff.new,
25
+ ]
26
+ "I am a very very long string " * 4
27
+ [
28
+ +"I am allocated",
29
+ +"I am allocated too",
30
+ ]
31
+ $freed_objects = nil
32
+ Struct.new("foo".to_sym, :bar)
33
+ end
34
+ end
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rubocop' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rubocop", "rubocop")
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,9 @@
1
+ #!/bin/bash
2
+
3
+ if [[ $# -eq 0 ]]; then
4
+ exec ruby -I"test" -w -e 'Dir.glob("./test/**/*_test.rb").each { |f| require f }' -- "$@"
5
+ else
6
+ path=$1
7
+ exec ruby -I"test" -w -e "require '${path#test/}'" -- "$@"
8
+ fi
9
+
data/dev.yml ADDED
@@ -0,0 +1,20 @@
1
+ name: heap-profiler
2
+
3
+ type: ruby
4
+
5
+ up:
6
+ - ruby: 2.7.1
7
+ - bundler
8
+
9
+ commands:
10
+ console:
11
+ desc: 'start a console'
12
+ run: bin/console
13
+ run:
14
+ desc: 'start the application'
15
+ run: bin/run
16
+ test:
17
+ syntax:
18
+ argument: file
19
+ optional: args...
20
+ run: bin/testunit
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "heap_profiler/full"
5
+
6
+ exit(HeapProfiler::CLI.new(ARGV).run)
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+
5
+ $CXXFLAGS += ' -O3 -std=c++1z -Wno-register '
6
+
7
+ create_makefile 'heap_profiler/heap_profiler'
@@ -0,0 +1,262 @@
1
+ #include "ruby.h"
2
+ #include "simdjson.h"
3
+ #include <fstream>
4
+
5
+ using namespace simdjson;
6
+
7
+ static VALUE rb_eHeapProfilerError, sym_type, sym_class, sym_address, sym_value,
8
+ sym_memsize, sym_imemo_type, sym_struct, sym_file, sym_line, sym_shared,
9
+ sym_references;
10
+
11
+ static dom::parser parser;
12
+
13
+ const uint64_t digittoval[256] = {
14
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8,
17
+ 9, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0,
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19
+ 0, 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0};
28
+
29
+ // Inspired by https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
30
+ // Ruby addresses in heap dump are hexadecimal strings "0x000000000000"...0xffffffffffff".
31
+ // The format being fairly stable allow for faster parsing. It should be equivalent to String#to_i(16).
32
+ static inline uint64_t parse_address(const char * address) {
33
+ return (
34
+ digittoval[address[ 2]] << 44 |
35
+ digittoval[address[ 3]] << 40 |
36
+ digittoval[address[ 4]] << 36 |
37
+ digittoval[address[ 5]] << 32 |
38
+ digittoval[address[ 6]] << 28 |
39
+ digittoval[address[ 7]] << 24 |
40
+ digittoval[address[ 8]] << 20 |
41
+ digittoval[address[ 9]] << 16 |
42
+ digittoval[address[10]] << 12 |
43
+ digittoval[address[11]] << 8 |
44
+ digittoval[address[12]] << 4 |
45
+ digittoval[address[13]]
46
+ );
47
+ }
48
+
49
+ static inline int64_t parse_address(dom::element element) {
50
+ std::string_view address;
51
+ if (element.get(address)) {
52
+ return 0; // ROOT object
53
+ }
54
+ assert(address.size() == 14);
55
+ return parse_address(address.data());
56
+ }
57
+
58
+ static VALUE rb_heap_build_index(VALUE self, VALUE path, VALUE batch_size) {
59
+ Check_Type(path, T_STRING);
60
+ Check_Type(batch_size, T_FIXNUM);
61
+
62
+ VALUE string_index = rb_hash_new();
63
+ VALUE class_index = rb_hash_new();
64
+
65
+ try {
66
+ auto [objects, error] = parser.load_many(RSTRING_PTR(path), FIX2INT(batch_size));
67
+ if (error != SUCCESS) {
68
+ rb_raise(rb_eHeapProfilerError, "%s", error_message(error));
69
+ }
70
+
71
+ for (dom::object object : objects) {
72
+ std::string_view type;
73
+ if (object["type"].get(type)) {
74
+ continue;
75
+ }
76
+
77
+ if (type == "STRING") {
78
+ std::string_view value;
79
+ if (!object["value"].get(value)) {
80
+ VALUE address = INT2FIX(parse_address(object["address"]));
81
+ VALUE string = rb_utf8_str_new(value.data(), value.size());
82
+ rb_hash_aset(string_index, address, string);
83
+ }
84
+ } else if (type == "CLASS" || type == "MODULE") {
85
+ std::string_view name;
86
+ if (!object["name"].get(name)) {
87
+ VALUE address = INT2FIX(parse_address(object["address"]));
88
+ VALUE class_name = rb_utf8_str_new(name.data(), name.size());
89
+ rb_hash_aset(class_index, address, class_name);
90
+ }
91
+ }
92
+ }
93
+ }
94
+ catch (simdjson::simdjson_error error)
95
+ {
96
+ rb_raise(rb_eHeapProfilerError, "exc: %s", error.what());
97
+ }
98
+
99
+ VALUE return_value = rb_ary_new();
100
+ rb_ary_push(return_value, class_index);
101
+ rb_ary_push(return_value, string_index);
102
+ return return_value;
103
+ }
104
+
105
+ static VALUE rb_heap_parse_address(VALUE self, VALUE address) {
106
+ Check_Type(address, T_STRING);
107
+ assert(RSTRING_LEN(address) == 14);
108
+ return INT2FIX(parse_address(RSTRING_PTR(address)));
109
+ }
110
+
111
+ static VALUE make_ruby_object(dom::object object)
112
+ {
113
+ VALUE hash = rb_hash_new();
114
+
115
+ std::string_view type;
116
+ if (!object["type"].get(type)) {
117
+ rb_hash_aset(hash, sym_type, rb_utf8_str_new(type.data(), type.size()));
118
+ }
119
+
120
+ std::string_view address;
121
+ if (!object["address"].get(address)) {
122
+ rb_hash_aset(hash, sym_address, INT2FIX(parse_address(address.data())));
123
+ }
124
+
125
+ std::string_view _class;
126
+ if (!object["class"].get(_class)) {
127
+ rb_hash_aset(hash, sym_class, INT2FIX(parse_address(_class.data())));
128
+ }
129
+
130
+ uint64_t memsize;
131
+ if (!object["memsize"].get(memsize)) {
132
+ rb_hash_aset(hash, sym_memsize, INT2FIX(memsize));
133
+ }
134
+
135
+ if (type == "IMEMO") {
136
+ std::string_view imemo_type;
137
+ if (!object["imemo_type"].get(imemo_type)) {
138
+ rb_hash_aset(hash, sym_imemo_type, rb_utf8_str_new(imemo_type.data(), imemo_type.size()));
139
+ }
140
+ } else if (type == "DATA") {
141
+ std::string_view _struct;
142
+ if (!object["struct"].get(_struct)) {
143
+ rb_hash_aset(hash, sym_struct, rb_utf8_str_new(_struct.data(), _struct.size()));
144
+ }
145
+ } else if (type == "STRING") {
146
+ std::string_view value;
147
+ if (!object["value"].get(value)) {
148
+ rb_hash_aset(hash, sym_value, rb_utf8_str_new(value.data(), value.size()));
149
+ }
150
+
151
+ bool shared;
152
+ if (!object["shared"].get(shared)) {
153
+ rb_hash_aset(hash, sym_shared, shared ? Qtrue : Qnil);
154
+ if (shared) {
155
+ VALUE references = rb_ary_new();
156
+ dom::array reference_elements(object["references"]);
157
+ for (dom::element reference_element : reference_elements) {
158
+ std::string_view reference;
159
+ if (!reference_element.get(reference)) {
160
+ rb_ary_push(references, INT2FIX(parse_address(reference.data())));
161
+ }
162
+ }
163
+ rb_hash_aset(hash, sym_references, references);
164
+ }
165
+ }
166
+ }
167
+
168
+ std::string_view file;
169
+ if (!object["file"].get(file)) {
170
+ rb_hash_aset(hash, sym_file, rb_utf8_str_new(file.data(), file.size()));
171
+ }
172
+
173
+ uint64_t line;
174
+ if (!object["line"].get(line)) {
175
+ rb_hash_aset(hash, sym_line, INT2FIX(line));
176
+ }
177
+
178
+ return hash;
179
+ }
180
+
181
+ static VALUE rb_heap_load_many(VALUE self, VALUE arg, VALUE batch_size)
182
+ {
183
+ Check_Type(arg, T_STRING);
184
+ Check_Type(batch_size, T_FIXNUM);
185
+
186
+ try
187
+ {
188
+ auto [docs, error] = parser.load_many(RSTRING_PTR(arg), FIX2INT(batch_size));
189
+ if (error != SUCCESS)
190
+ {
191
+ rb_raise(rb_eHeapProfilerError, "%s", error_message(error));
192
+ }
193
+
194
+ for (dom::element doc : docs)
195
+ {
196
+ rb_yield(make_ruby_object(doc));
197
+ }
198
+
199
+ return Qnil;
200
+ }
201
+ catch (simdjson::simdjson_error error)
202
+ {
203
+ rb_raise(rb_eHeapProfilerError, "%s", error.what());
204
+ }
205
+ }
206
+
207
+ static VALUE rb_heap_filter(VALUE self, VALUE source_path, VALUE destination_path, VALUE _generation)
208
+ {
209
+ Check_Type(source_path, T_STRING);
210
+ Check_Type(destination_path, T_STRING);
211
+ Check_Type(_generation, T_FIXNUM);
212
+ int64_t generation = FIX2INT(_generation);
213
+
214
+ std::ifstream input(RSTRING_PTR(source_path));
215
+ std::ofstream output(RSTRING_PTR(destination_path), std::ofstream::out);
216
+ int count = 0;
217
+ for (std::string line; getline( input, line );) {
218
+ int64_t object_generation;
219
+ dom::element object = parser.parse(line);
220
+ if (object["generation"].get(object_generation) || object_generation < generation) {
221
+ continue;
222
+ }
223
+
224
+ std::string_view file;
225
+ if (!object["file"].get(file) && file == "__hprof") {
226
+ continue;
227
+ }
228
+
229
+ count += 1;
230
+ output << line << std::endl;
231
+ }
232
+
233
+ output.close();
234
+ return INT2FIX(count);
235
+ }
236
+
237
+ extern "C" {
238
+ void Init_heap_profiler(void) {
239
+ sym_type = ID2SYM(rb_intern("type"));
240
+ sym_class = ID2SYM(rb_intern("class"));
241
+ sym_address = ID2SYM(rb_intern("address"));
242
+ sym_value = ID2SYM(rb_intern("value"));
243
+ sym_memsize = ID2SYM(rb_intern("memsize"));
244
+ sym_struct = ID2SYM(rb_intern("struct"));
245
+ sym_imemo_type = ID2SYM(rb_intern("imemo_type"));
246
+ sym_file = ID2SYM(rb_intern("file"));
247
+ sym_line = ID2SYM(rb_intern("line"));
248
+ sym_shared = ID2SYM(rb_intern("shared"));
249
+ sym_references = ID2SYM(rb_intern("references"));
250
+
251
+ VALUE rb_mHeapProfiler = rb_const_get(rb_cObject, rb_intern("HeapProfiler"));
252
+ VALUE rb_mHeapProfilerNative = rb_const_get(rb_mHeapProfiler, rb_intern("Native"));
253
+
254
+ rb_eHeapProfilerError = rb_const_get(rb_mHeapProfiler, rb_intern("Error"));
255
+ rb_global_variable(&rb_eHeapProfilerError);
256
+
257
+ rb_define_module_function(rb_mHeapProfilerNative, "_build_index", reinterpret_cast<VALUE (*)(...)>(rb_heap_build_index), 2);
258
+ rb_define_module_function(rb_mHeapProfilerNative, "parse_address", reinterpret_cast<VALUE (*)(...)>(rb_heap_parse_address), 1);
259
+ rb_define_module_function(rb_mHeapProfilerNative, "_load_many", reinterpret_cast<VALUE (*)(...)>(rb_heap_load_many), 2);
260
+ rb_define_module_function(rb_mHeapProfilerNative, "_filter_heap", reinterpret_cast<VALUE (*)(...)>(rb_heap_filter), 3);
261
+ }
262
+ }