heap-profiler 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+ require "bundler/gem_tasks"
3
+ require "rake/testtask"
4
+ require "rake/extensiontask"
5
+
6
+ Rake::ExtensionTask.new("heap_profiler") do |ext|
7
+ ext.ext_dir = 'ext/heap_profiler'
8
+ ext.lib_dir = "lib/heap_profiler"
9
+ end
10
+
11
+ Rake::TestTask.new(:test) do |t|
12
+ t.libs << "test"
13
+ t.libs << "lib"
14
+ t.test_files = FileList["test/**/*_test.rb"]
15
+ end
16
+
17
+ task default: %i(compile test)
data/TODO.md ADDED
@@ -0,0 +1,9 @@
1
+ ### Important
2
+
3
+ - Test with other Ruby versions.
4
+
5
+ ### Explore
6
+
7
+ - Performance optimization.
8
+ - Would filtering on the `generation` field be faster than the address extraction?
9
+ - Detect object growth?
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "benchmark/ips"
6
+ require "heap_profiler/full"
7
+
8
+ Benchmark.ips do |x|
9
+ x.report("ruby") { HeapProfiler::Native.ruby_parse_address("0x7f921e88a8f8") }
10
+ x.report("cpp") { HeapProfiler::Native.parse_address("0x7f921e88a8f8") }
11
+ x.compare!
12
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "benchmark/ips"
6
+ require "heap_profiler/full"
7
+
8
+ FIXTURE_PATH = File.expand_path("../../test/fixtures/diffed-heap/allocated.heap", __FILE__)
9
+
10
+ Benchmark.ips do |x|
11
+ x.report("ruby") { HeapProfiler::Native.ruby_build_index(FIXTURE_PATH) }
12
+ x.report("cpp") { HeapProfiler::Native.build_index(FIXTURE_PATH) }
13
+ x.compare!
14
+ end
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "heap-profiler"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+ require 'bundler/setup'
4
+ require "heap-profiler"
5
+ require 'fileutils'
6
+
7
+ class SomeCustomStuff
8
+ end
9
+
10
+ $freed_objects = [+"i am free", +"i am free too"]
11
+
12
+ dir = ARGV.first
13
+ FileUtils.mkdir_p(dir)
14
+ FileUtils.rm_rf(Dir[File.join(dir, '*')])
15
+
16
+ if ARGV[1] == '--empty'
17
+ HeapProfiler.report(dir) {}
18
+ else
19
+ HeapProfiler.report(dir) do
20
+ $retained_objects = [
21
+ +"I am retained",
22
+ +"I am retained",
23
+ +"I am retained too",
24
+ SomeCustomStuff.new,
25
+ ]
26
+ "I am a very very long string " * 4
27
+ [
28
+ +"I am allocated",
29
+ +"I am allocated too",
30
+ ]
31
+ $freed_objects = nil
32
+ Struct.new("foo".to_sym, :bar)
33
+ end
34
+ end
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rubocop' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rubocop", "rubocop")
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,9 @@
1
+ #!/bin/bash
2
+
3
+ if [[ $# -eq 0 ]]; then
4
+ exec ruby -I"test" -w -e 'Dir.glob("./test/**/*_test.rb").each { |f| require f }' -- "$@"
5
+ else
6
+ path=$1
7
+ exec ruby -I"test" -w -e "require '${path#test/}'" -- "$@"
8
+ fi
9
+
data/dev.yml ADDED
@@ -0,0 +1,20 @@
1
+ name: heap-profiler
2
+
3
+ type: ruby
4
+
5
+ up:
6
+ - ruby: 2.7.1
7
+ - bundler
8
+
9
+ commands:
10
+ console:
11
+ desc: 'start a console'
12
+ run: bin/console
13
+ run:
14
+ desc: 'start the application'
15
+ run: bin/run
16
+ test:
17
+ syntax:
18
+ argument: file
19
+ optional: args...
20
+ run: bin/testunit
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "heap_profiler/full"
5
+
6
+ exit(HeapProfiler::CLI.new(ARGV).run)
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+
5
+ $CXXFLAGS += ' -O3 -std=c++1z -Wno-register '
6
+
7
+ create_makefile 'heap_profiler/heap_profiler'
@@ -0,0 +1,262 @@
1
+ #include "ruby.h"
2
+ #include "simdjson.h"
3
+ #include <fstream>
4
+
5
+ using namespace simdjson;
6
+
7
+ static VALUE rb_eHeapProfilerError, sym_type, sym_class, sym_address, sym_value,
8
+ sym_memsize, sym_imemo_type, sym_struct, sym_file, sym_line, sym_shared,
9
+ sym_references;
10
+
11
+ static dom::parser parser;
12
+
13
+ const uint64_t digittoval[256] = {
14
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8,
17
+ 9, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0,
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19
+ 0, 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0};
28
+
29
+ // Inspired by https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
30
+ // Ruby addresses in heap dump are hexadecimal strings "0x000000000000"...0xffffffffffff".
31
+ // The format being fairly stable allow for faster parsing. It should be equivalent to String#to_i(16).
32
+ static inline uint64_t parse_address(const char * address) {
33
+ return (
34
+ digittoval[address[ 2]] << 44 |
35
+ digittoval[address[ 3]] << 40 |
36
+ digittoval[address[ 4]] << 36 |
37
+ digittoval[address[ 5]] << 32 |
38
+ digittoval[address[ 6]] << 28 |
39
+ digittoval[address[ 7]] << 24 |
40
+ digittoval[address[ 8]] << 20 |
41
+ digittoval[address[ 9]] << 16 |
42
+ digittoval[address[10]] << 12 |
43
+ digittoval[address[11]] << 8 |
44
+ digittoval[address[12]] << 4 |
45
+ digittoval[address[13]]
46
+ );
47
+ }
48
+
49
+ static inline int64_t parse_address(dom::element element) {
50
+ std::string_view address;
51
+ if (element.get(address)) {
52
+ return 0; // ROOT object
53
+ }
54
+ assert(address.size() == 14);
55
+ return parse_address(address.data());
56
+ }
57
+
58
+ static VALUE rb_heap_build_index(VALUE self, VALUE path, VALUE batch_size) {
59
+ Check_Type(path, T_STRING);
60
+ Check_Type(batch_size, T_FIXNUM);
61
+
62
+ VALUE string_index = rb_hash_new();
63
+ VALUE class_index = rb_hash_new();
64
+
65
+ try {
66
+ auto [objects, error] = parser.load_many(RSTRING_PTR(path), FIX2INT(batch_size));
67
+ if (error != SUCCESS) {
68
+ rb_raise(rb_eHeapProfilerError, "%s", error_message(error));
69
+ }
70
+
71
+ for (dom::object object : objects) {
72
+ std::string_view type;
73
+ if (object["type"].get(type)) {
74
+ continue;
75
+ }
76
+
77
+ if (type == "STRING") {
78
+ std::string_view value;
79
+ if (!object["value"].get(value)) {
80
+ VALUE address = INT2FIX(parse_address(object["address"]));
81
+ VALUE string = rb_utf8_str_new(value.data(), value.size());
82
+ rb_hash_aset(string_index, address, string);
83
+ }
84
+ } else if (type == "CLASS" || type == "MODULE") {
85
+ std::string_view name;
86
+ if (!object["name"].get(name)) {
87
+ VALUE address = INT2FIX(parse_address(object["address"]));
88
+ VALUE class_name = rb_utf8_str_new(name.data(), name.size());
89
+ rb_hash_aset(class_index, address, class_name);
90
+ }
91
+ }
92
+ }
93
+ }
94
+ catch (simdjson::simdjson_error error)
95
+ {
96
+ rb_raise(rb_eHeapProfilerError, "exc: %s", error.what());
97
+ }
98
+
99
+ VALUE return_value = rb_ary_new();
100
+ rb_ary_push(return_value, class_index);
101
+ rb_ary_push(return_value, string_index);
102
+ return return_value;
103
+ }
104
+
105
+ static VALUE rb_heap_parse_address(VALUE self, VALUE address) {
106
+ Check_Type(address, T_STRING);
107
+ assert(RSTRING_LEN(address) == 14);
108
+ return INT2FIX(parse_address(RSTRING_PTR(address)));
109
+ }
110
+
111
+ static VALUE make_ruby_object(dom::object object)
112
+ {
113
+ VALUE hash = rb_hash_new();
114
+
115
+ std::string_view type;
116
+ if (!object["type"].get(type)) {
117
+ rb_hash_aset(hash, sym_type, rb_utf8_str_new(type.data(), type.size()));
118
+ }
119
+
120
+ std::string_view address;
121
+ if (!object["address"].get(address)) {
122
+ rb_hash_aset(hash, sym_address, INT2FIX(parse_address(address.data())));
123
+ }
124
+
125
+ std::string_view _class;
126
+ if (!object["class"].get(_class)) {
127
+ rb_hash_aset(hash, sym_class, INT2FIX(parse_address(_class.data())));
128
+ }
129
+
130
+ uint64_t memsize;
131
+ if (!object["memsize"].get(memsize)) {
132
+ rb_hash_aset(hash, sym_memsize, INT2FIX(memsize));
133
+ }
134
+
135
+ if (type == "IMEMO") {
136
+ std::string_view imemo_type;
137
+ if (!object["imemo_type"].get(imemo_type)) {
138
+ rb_hash_aset(hash, sym_imemo_type, rb_utf8_str_new(imemo_type.data(), imemo_type.size()));
139
+ }
140
+ } else if (type == "DATA") {
141
+ std::string_view _struct;
142
+ if (!object["struct"].get(_struct)) {
143
+ rb_hash_aset(hash, sym_struct, rb_utf8_str_new(_struct.data(), _struct.size()));
144
+ }
145
+ } else if (type == "STRING") {
146
+ std::string_view value;
147
+ if (!object["value"].get(value)) {
148
+ rb_hash_aset(hash, sym_value, rb_utf8_str_new(value.data(), value.size()));
149
+ }
150
+
151
+ bool shared;
152
+ if (!object["shared"].get(shared)) {
153
+ rb_hash_aset(hash, sym_shared, shared ? Qtrue : Qnil);
154
+ if (shared) {
155
+ VALUE references = rb_ary_new();
156
+ dom::array reference_elements(object["references"]);
157
+ for (dom::element reference_element : reference_elements) {
158
+ std::string_view reference;
159
+ if (!reference_element.get(reference)) {
160
+ rb_ary_push(references, INT2FIX(parse_address(reference.data())));
161
+ }
162
+ }
163
+ rb_hash_aset(hash, sym_references, references);
164
+ }
165
+ }
166
+ }
167
+
168
+ std::string_view file;
169
+ if (!object["file"].get(file)) {
170
+ rb_hash_aset(hash, sym_file, rb_utf8_str_new(file.data(), file.size()));
171
+ }
172
+
173
+ uint64_t line;
174
+ if (!object["line"].get(line)) {
175
+ rb_hash_aset(hash, sym_line, INT2FIX(line));
176
+ }
177
+
178
+ return hash;
179
+ }
180
+
181
+ static VALUE rb_heap_load_many(VALUE self, VALUE arg, VALUE batch_size)
182
+ {
183
+ Check_Type(arg, T_STRING);
184
+ Check_Type(batch_size, T_FIXNUM);
185
+
186
+ try
187
+ {
188
+ auto [docs, error] = parser.load_many(RSTRING_PTR(arg), FIX2INT(batch_size));
189
+ if (error != SUCCESS)
190
+ {
191
+ rb_raise(rb_eHeapProfilerError, "%s", error_message(error));
192
+ }
193
+
194
+ for (dom::element doc : docs)
195
+ {
196
+ rb_yield(make_ruby_object(doc));
197
+ }
198
+
199
+ return Qnil;
200
+ }
201
+ catch (simdjson::simdjson_error error)
202
+ {
203
+ rb_raise(rb_eHeapProfilerError, "%s", error.what());
204
+ }
205
+ }
206
+
207
+ static VALUE rb_heap_filter(VALUE self, VALUE source_path, VALUE destination_path, VALUE _generation)
208
+ {
209
+ Check_Type(source_path, T_STRING);
210
+ Check_Type(destination_path, T_STRING);
211
+ Check_Type(_generation, T_FIXNUM);
212
+ int64_t generation = FIX2INT(_generation);
213
+
214
+ std::ifstream input(RSTRING_PTR(source_path));
215
+ std::ofstream output(RSTRING_PTR(destination_path), std::ofstream::out);
216
+ int count = 0;
217
+ for (std::string line; getline( input, line );) {
218
+ int64_t object_generation;
219
+ dom::element object = parser.parse(line);
220
+ if (object["generation"].get(object_generation) || object_generation < generation) {
221
+ continue;
222
+ }
223
+
224
+ std::string_view file;
225
+ if (!object["file"].get(file) && file == "__hprof") {
226
+ continue;
227
+ }
228
+
229
+ count += 1;
230
+ output << line << std::endl;
231
+ }
232
+
233
+ output.close();
234
+ return INT2FIX(count);
235
+ }
236
+
237
+ extern "C" {
238
+ void Init_heap_profiler(void) {
239
+ sym_type = ID2SYM(rb_intern("type"));
240
+ sym_class = ID2SYM(rb_intern("class"));
241
+ sym_address = ID2SYM(rb_intern("address"));
242
+ sym_value = ID2SYM(rb_intern("value"));
243
+ sym_memsize = ID2SYM(rb_intern("memsize"));
244
+ sym_struct = ID2SYM(rb_intern("struct"));
245
+ sym_imemo_type = ID2SYM(rb_intern("imemo_type"));
246
+ sym_file = ID2SYM(rb_intern("file"));
247
+ sym_line = ID2SYM(rb_intern("line"));
248
+ sym_shared = ID2SYM(rb_intern("shared"));
249
+ sym_references = ID2SYM(rb_intern("references"));
250
+
251
+ VALUE rb_mHeapProfiler = rb_const_get(rb_cObject, rb_intern("HeapProfiler"));
252
+ VALUE rb_mHeapProfilerNative = rb_const_get(rb_mHeapProfiler, rb_intern("Native"));
253
+
254
+ rb_eHeapProfilerError = rb_const_get(rb_mHeapProfiler, rb_intern("Error"));
255
+ rb_global_variable(&rb_eHeapProfilerError);
256
+
257
+ rb_define_module_function(rb_mHeapProfilerNative, "_build_index", reinterpret_cast<VALUE (*)(...)>(rb_heap_build_index), 2);
258
+ rb_define_module_function(rb_mHeapProfilerNative, "parse_address", reinterpret_cast<VALUE (*)(...)>(rb_heap_parse_address), 1);
259
+ rb_define_module_function(rb_mHeapProfilerNative, "_load_many", reinterpret_cast<VALUE (*)(...)>(rb_heap_load_many), 2);
260
+ rb_define_module_function(rb_mHeapProfilerNative, "_filter_heap", reinterpret_cast<VALUE (*)(...)>(rb_heap_filter), 3);
261
+ }
262
+ }