allocation_sampler 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 268a796ca59b96cfcba16f0ee5778de979a9295adb7b7d32285d7749fabfb7e7
4
+ data.tar.gz: 76c4e4773f3ed799f461569db97a4541e873cdde5bad3ab1dbfe589c21991647
5
+ SHA512:
6
+ metadata.gz: 7d25b1f773144e4c60dc0eb2ac8f38f9623de0c6e5e7177ccd871c58edb0d018d7505c400f62423d2dce7c921dc49b5e9f98fb8c87e6448b4225fe82f9c04303
7
+ data.tar.gz: da097230b8c5d24f9c1d8c85e1cc9bd0e11a2743232ee1cc936ccde713c773fcbf85ce6ef0a4a48a170f522ffc5480b18b4bdd4532041ca9cb677139dc62683a
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2018-08-06
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birthday!
6
+
@@ -0,0 +1,9 @@
1
+ CHANGELOG.md
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ ext/allocation_sampler/allocation_sampler.c
6
+ ext/allocation_sampler/sort_r.h
7
+ ext/allocation_sampler/extconf.rb
8
+ lib/allocation_sampler.rb
9
+ test/test_allocation_sampler.rb
@@ -0,0 +1,45 @@
1
+ # allocation_sampler
2
+
3
+ * https://github.com/tenderlove/allocation_sampler
4
+
5
+ ## DESCRIPTION:
6
+
7
+ A sampling allocation profiler. This keeps track of allocations, but only on
8
+ specified intervals. Useful for profiling allocations in programs where there
9
+ is a time limit on completion of the program.
10
+
11
+ ## SYNOPSIS:
12
+
13
+ ```ruby
14
+ as = ObjectSpace::AllocationSampler.new(interval: 1)
15
+ as.enable
16
+ 10.times { Object.new }
17
+ as.disable
18
+
19
+ as.result # => {"Object"=>{"<compiled>"=>{1=>10}}}
20
+ ```
21
+
22
+ ## LICENSE:
23
+
24
+ (The MIT License)
25
+
26
+ Copyright (c) 2018 Aaron Patterson
27
+
28
+ Permission is hereby granted, free of charge, to any person obtaining
29
+ a copy of this software and associated documentation files (the
30
+ 'Software'), to deal in the Software without restriction, including
31
+ without limitation the rights to use, copy, modify, merge, publish,
32
+ distribute, sublicense, and/or sell copies of the Software, and to
33
+ permit persons to whom the Software is furnished to do so, subject to
34
+ the following conditions:
35
+
36
+ The above copyright notice and this permission notice shall be
37
+ included in all copies or substantial portions of the Software.
38
+
39
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
40
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
41
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
42
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
43
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
44
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
45
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,26 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require 'rake/extensiontask'
6
+
7
+ Hoe.plugin :minitest
8
+ Hoe.plugin :gemspec # `gem install hoe-gemspec`
9
+ Hoe.plugin :git # `gem install hoe-git`
10
+
11
+ HOE = Hoe.spec 'allocation_sampler' do
12
+ developer('Aaron Patterson', 'aaron@tenderlovemaking.com')
13
+ self.readme_file = 'README.md'
14
+ self.history_file = 'CHANGELOG.md'
15
+ self.extra_rdoc_files = FileList['*.md']
16
+ self.license 'MIT'
17
+ self.spec_extras = {
18
+ :extensions => ["ext/allocation_sampler/extconf.rb"],
19
+ }
20
+ end
21
+
22
+ Rake::ExtensionTask.new("allocation_sampler", HOE.spec)
23
+
24
+ task :default => [:compile, :test]
25
+
26
+ # vim: syntax=ruby
@@ -0,0 +1,532 @@
1
+ #include <ruby/ruby.h>
2
+ #include <ruby/debug.h>
3
+ #include <stdlib.h>
4
+ #include "sort_r.h"
5
+
6
+ typedef struct {
7
+ char frames;
8
+ size_t capa;
9
+ size_t next_free;
10
+ size_t prev_free;
11
+ size_t record_count;
12
+ union {
13
+ VALUE *frames;
14
+ int *lines;
15
+ } as;
16
+ } sample_buffer_t;
17
+
18
+ typedef struct {
19
+ size_t interval;
20
+ size_t allocation_count;
21
+ size_t overall_samples;
22
+ sample_buffer_t * stack_samples;
23
+ sample_buffer_t * lines_samples;
24
+ VALUE newobj_hook;
25
+ } trace_stats_t;
26
+
27
+ typedef struct {
28
+ sample_buffer_t * frames;
29
+ sample_buffer_t * lines;
30
+ } compare_data_t;
31
+
32
+ static void
33
+ free_sample_buffer(sample_buffer_t *buffer)
34
+ {
35
+ if (buffer->frames) {
36
+ xfree(buffer->as.lines);
37
+ } else {
38
+ xfree(buffer->as.frames);
39
+ }
40
+ xfree(buffer);
41
+ }
42
+
43
+ static sample_buffer_t *
44
+ alloc_lines_buffer(size_t size)
45
+ {
46
+ sample_buffer_t * samples = xcalloc(sizeof(sample_buffer_t), 1);
47
+ samples->as.lines = xcalloc(sizeof(int), size);
48
+ samples->capa = size;
49
+ samples->frames = 0;
50
+ return samples;
51
+ }
52
+
53
+ static sample_buffer_t *
54
+ alloc_frames_buffer(size_t size)
55
+ {
56
+ sample_buffer_t * samples = xcalloc(sizeof(sample_buffer_t), 1);
57
+ samples->as.frames = xcalloc(sizeof(VALUE), size);
58
+ samples->capa = size;
59
+ samples->frames = 1;
60
+ return samples;
61
+ }
62
+
63
+ static void
64
+ ensure_sample_buffer_capa(sample_buffer_t * buffer, size_t size)
65
+ {
66
+ /* If we can't fit all the samples in the buffer, double the buffer size. */
67
+ while (buffer->capa <= (buffer->next_free - 1) + (size + 2)) {
68
+ buffer->capa *= 2;
69
+ if (buffer->frames) {
70
+ buffer->as.frames = xrealloc(buffer->as.frames, sizeof(VALUE) * buffer->capa);
71
+ } else {
72
+ buffer->as.lines = xrealloc(buffer->as.lines, sizeof(int) * buffer->capa);
73
+ }
74
+ }
75
+ }
76
+
77
+ static void
78
+ dealloc(void *ptr)
79
+ {
80
+ trace_stats_t * stats = (trace_stats_t *)ptr;
81
+ sample_buffer_t * frames;
82
+ sample_buffer_t * lines;
83
+
84
+ frames = stats->stack_samples;
85
+ lines = stats->lines_samples;
86
+
87
+ if (frames && lines) {
88
+ free_sample_buffer(frames);
89
+ free_sample_buffer(lines);
90
+ }
91
+ xfree(stats);
92
+ }
93
+
94
+ static VALUE
95
+ make_frame_info(VALUE *frames, int *lines)
96
+ {
97
+ size_t count, i;
98
+ VALUE rb_frames;
99
+
100
+ count = *frames;
101
+ frames++;
102
+ lines++;
103
+
104
+ rb_frames = rb_ary_new_capa(count);
105
+
106
+ for(i = 0; i < count; i++, frames++, lines++) {
107
+ VALUE line = INT2NUM(*lines);
108
+ rb_ary_push(rb_frames, rb_ary_new3(2, rb_obj_id(*frames), line));
109
+ }
110
+
111
+ return rb_frames;
112
+ }
113
+
114
+ static int
115
+ compare(const void* l, const void* r, void* ctx)
116
+ {
117
+ compare_data_t *compare_data = (compare_data_t *)ctx;
118
+ sample_buffer_t *stacks = compare_data->frames;
119
+ sample_buffer_t *lines = compare_data->lines;
120
+
121
+ size_t left_offset = *(const size_t*)l;
122
+ size_t right_offset = *(const size_t*)r;
123
+
124
+ size_t lstack = *(stacks->as.frames + left_offset);
125
+ size_t rstack = *(stacks->as.frames + right_offset);
126
+
127
+ if (lstack == rstack) {
128
+ /* Compare the stack plus type info */
129
+ int stack_cmp = memcmp(stacks->as.frames + left_offset,
130
+ stacks->as.frames + right_offset,
131
+ (lstack + 3) * sizeof(VALUE *));
132
+
133
+ if (stack_cmp == 0) {
134
+ /* If the stacks are the same, check the line numbers */
135
+ int line_cmp = memcmp(lines->as.lines + left_offset + 1,
136
+ lines->as.lines + right_offset + 1,
137
+ lstack * sizeof(int));
138
+
139
+ return line_cmp;
140
+ } else {
141
+ return stack_cmp;
142
+ }
143
+ } else {
144
+ if (lstack < rstack) {
145
+ return -1;
146
+ } else {
147
+ return 1;
148
+ }
149
+ }
150
+ }
151
+
152
+ static void
153
+ mark(void * ptr)
154
+ {
155
+ trace_stats_t * stats = (trace_stats_t *)ptr;
156
+ sample_buffer_t * stacks;
157
+
158
+ stacks = stats->stack_samples;
159
+
160
+ if (stacks) {
161
+ VALUE * frame = stacks->as.frames;
162
+
163
+ while(frame < stacks->as.frames + stacks->next_free) {
164
+ size_t stack_size;
165
+ VALUE * head;
166
+
167
+ stack_size = *frame;
168
+ frame++; /* First element is the stack size */
169
+ head = frame;
170
+
171
+ for(; frame < (head + stack_size); frame++) {
172
+ rb_gc_mark(*frame);
173
+ }
174
+ frame++; /* Frame info */
175
+ rb_gc_mark(*frame);
176
+ frame++; /* Next Head */
177
+ }
178
+ }
179
+
180
+ if (stats->newobj_hook) {
181
+ rb_gc_mark(stats->newobj_hook);
182
+ }
183
+ }
184
+
185
+ static const rb_data_type_t trace_stats_type = {
186
+ "ObjectSpace/AllocationSampler",
187
+ {mark, dealloc, 0,},
188
+ 0, 0,
189
+ #ifdef RUBY_TYPED_FREE_IMMEDIATELY
190
+ RUBY_TYPED_FREE_IMMEDIATELY,
191
+ #endif
192
+ };
193
+
194
+ static VALUE
195
+ user_class(VALUE klass, VALUE obj)
196
+ {
197
+ if (RTEST(klass) && !(RB_TYPE_P(obj, T_IMEMO) || RB_TYPE_P(obj, T_NODE)) && BUILTIN_TYPE(klass) == T_CLASS) {
198
+ return rb_class_path_cached(rb_class_real(klass));
199
+ } else {
200
+ return Qnil;
201
+ }
202
+ }
203
+
204
+ #define BUF_SIZE 2048
205
+
206
+ static void
207
+ newobj(VALUE tpval, void *ptr)
208
+ {
209
+ trace_stats_t * stats = (trace_stats_t *)ptr;
210
+
211
+ if (!(stats->allocation_count % stats->interval)) {
212
+ rb_trace_arg_t *tparg = rb_tracearg_from_tracepoint(tpval);
213
+ VALUE obj = rb_tracearg_object(tparg);
214
+ VALUE klass = RBASIC_CLASS(obj);
215
+ VALUE uc = user_class(klass, obj);
216
+
217
+ if (!NIL_P(uc)) {
218
+ VALUE frames_buffer[BUF_SIZE];
219
+ int lines_buffer[BUF_SIZE];
220
+
221
+ VALUE path = rb_tracearg_path(tparg);
222
+
223
+ if (RTEST(path)) {
224
+ sample_buffer_t * stack_samples;
225
+ sample_buffer_t * lines_samples;
226
+
227
+ int num = rb_profile_frames(0, sizeof(frames_buffer) / sizeof(VALUE), frames_buffer, lines_buffer);
228
+ if (!stats->stack_samples) {
229
+ stats->stack_samples = alloc_frames_buffer(num * 100);
230
+ stats->lines_samples = alloc_lines_buffer(num * 100);
231
+ }
232
+ stack_samples = stats->stack_samples;
233
+ lines_samples = stats->lines_samples;
234
+
235
+ ensure_sample_buffer_capa(stack_samples, num + 2);
236
+ ensure_sample_buffer_capa(lines_samples, num + 2);
237
+
238
+ stack_samples->prev_free = stack_samples->next_free;
239
+ lines_samples->prev_free = lines_samples->next_free;
240
+
241
+ stack_samples->as.frames[stack_samples->next_free] = (VALUE)num;
242
+ lines_samples->as.lines[lines_samples->next_free] = (VALUE)num;
243
+
244
+ memcpy(stack_samples->as.frames + stack_samples->next_free + 1, frames_buffer, num * sizeof(VALUE *));
245
+ memcpy(lines_samples->as.lines + lines_samples->next_free + 1, lines_buffer, num * sizeof(int));
246
+
247
+ /* We're not doing de-duping right now, so just set the stack count to 0xdeadbeef */
248
+ stack_samples->as.frames[stack_samples->next_free + num + 1] = 0xdeadbeef;
249
+ stack_samples->as.frames[stack_samples->next_free + num + 2] = uc;
250
+
251
+ lines_samples->as.lines[stack_samples->next_free + num + 1] = 0xdeadbeef;
252
+ lines_samples->as.lines[stack_samples->next_free + num + 2] = uc;
253
+
254
+ stack_samples->next_free += (num + 3);
255
+ lines_samples->next_free += (num + 3);
256
+
257
+ stack_samples->record_count++;
258
+ lines_samples->record_count++;
259
+
260
+ stats->overall_samples++;
261
+ }
262
+ }
263
+ }
264
+ stats->allocation_count++;
265
+ }
266
+
267
+ static VALUE
268
+ allocate(VALUE klass)
269
+ {
270
+ trace_stats_t * stats;
271
+ stats = xcalloc(sizeof(trace_stats_t), 1);
272
+ stats->interval = 1;
273
+ stats->newobj_hook = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_NEWOBJ, newobj, stats);
274
+
275
+ return TypedData_Wrap_Struct(klass, &trace_stats_type, stats);
276
+ }
277
+
278
+ VALUE rb_cAllocationSampler;
279
+
280
+ static VALUE
281
+ enable(VALUE self)
282
+ {
283
+ trace_stats_t * stats;
284
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
285
+ rb_tracepoint_enable(stats->newobj_hook);
286
+ return Qnil;
287
+ }
288
+
289
+ static VALUE
290
+ disable(VALUE self)
291
+ {
292
+ trace_stats_t * stats;
293
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
294
+ rb_tracepoint_disable(stats->newobj_hook);
295
+ return Qnil;
296
+ }
297
+
298
+ static int
299
+ sort_frames(const void *left, const void *right)
300
+ {
301
+ const VALUE *vleft = (const VALUE *)left;
302
+ const VALUE *vright = (const VALUE *)right;
303
+ /* Sort so that 0 is always at the right */
304
+ if (*vleft == *vright) {
305
+ return 0;
306
+ } else {
307
+ if (*vleft == 0) {
308
+ return 1;
309
+ } else if (*vright == 0) {
310
+ return -1;
311
+ }
312
+ }
313
+ return *vleft - *vright;
314
+ }
315
+
316
+ static VALUE
317
+ frames(VALUE self)
318
+ {
319
+ trace_stats_t * stats;
320
+ sample_buffer_t * frame_buffer;
321
+ VALUE frames;
322
+ VALUE *samples;
323
+ VALUE *head;
324
+ VALUE rb_cFrame;
325
+
326
+ size_t buffer_size;
327
+
328
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
329
+
330
+ frame_buffer = stats->stack_samples;
331
+
332
+ if (!frame_buffer) {
333
+ return Qnil;
334
+ }
335
+
336
+ buffer_size = frame_buffer->next_free;
337
+
338
+ samples = xcalloc(sizeof(VALUE), buffer_size);
339
+ memcpy(samples, frame_buffer->as.frames, buffer_size * sizeof(VALUE));
340
+
341
+ /* Clear anything that's not a frame */
342
+ for(head = samples; head < (samples + buffer_size - 1); head++) {
343
+ size_t frame_count;
344
+ frame_count = *head;
345
+
346
+ *head = 0;
347
+ head++; /* Skip the count */
348
+ head += frame_count; /* Skip the stack */
349
+ *head = 0; /* Set the de-dup count to 0 */
350
+ head++;
351
+ *head = 0; /* Set the type to 0 */
352
+ }
353
+
354
+ qsort(samples, buffer_size, sizeof(VALUE *), sort_frames);
355
+
356
+ frames = rb_hash_new();
357
+
358
+ rb_cFrame = rb_const_get(rb_cAllocationSampler, rb_intern("Frame"));
359
+
360
+ for(head = samples; head < (samples + buffer_size); ) {
361
+ if (*head == 0)
362
+ break;
363
+
364
+ VALUE file;
365
+ VALUE frame;
366
+
367
+ file = rb_profile_frame_absolute_path(*(VALUE *)head);
368
+ if (NIL_P(file))
369
+ file = rb_profile_frame_path(*head);
370
+
371
+ VALUE args[3];
372
+
373
+ args[0] = rb_obj_id(*head);
374
+ args[1] = rb_profile_frame_full_label(*head);
375
+ args[2] = file;
376
+
377
+ frame = rb_class_new_instance(3, args, rb_cFrame);
378
+
379
+ rb_hash_aset(frames, rb_obj_id(*head), frame);
380
+
381
+ /* Skip duplicates */
382
+ VALUE *cmp;
383
+ for (cmp = head + 1; cmp < (samples + buffer_size); cmp++) {
384
+ if (*cmp != *head) {
385
+ break;
386
+ }
387
+ }
388
+ head = cmp;
389
+ }
390
+
391
+ xfree(samples);
392
+
393
+ return frames;
394
+ }
395
+
396
+ static VALUE
397
+ samples(VALUE self)
398
+ {
399
+ trace_stats_t * stats;
400
+ sample_buffer_t * frames;
401
+ sample_buffer_t * lines;
402
+ size_t *record_offsets;
403
+ VALUE result = Qnil;
404
+
405
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
406
+
407
+ frames = stats->stack_samples;
408
+ lines = stats->lines_samples;
409
+
410
+ if (frames && lines) {
411
+ size_t i, j;
412
+ size_t * head;
413
+ VALUE * frame = frames->as.frames;
414
+ compare_data_t compare_ctx;
415
+ compare_ctx.frames = frames;
416
+ compare_ctx.lines = lines;
417
+
418
+ record_offsets = xcalloc(sizeof(size_t), frames->record_count);
419
+ head = record_offsets;
420
+
421
+ i = 0;
422
+ while(frame < frames->as.frames + frames->next_free) {
423
+ *head = i; /* Store the frame start offset */
424
+ head++; /* Move to the next entry in record_offsets */
425
+ i += (*frame + 3); /* Increase the offset */
426
+ frame += (*frame + 3); /* Move to the next frame */
427
+ }
428
+
429
+ sort_r(record_offsets, frames->record_count, sizeof(size_t), compare, &compare_ctx);
430
+
431
+ VALUE unique_frames = rb_ary_new();
432
+
433
+ for(i = 0; i < frames->record_count; ) {
434
+ size_t current = record_offsets[i];
435
+ size_t count = 0;
436
+
437
+ /* Count any duplicate stacks ahead of us in the array */
438
+ for (j = i+1; j < frames->record_count; j++) {
439
+ size_t next = record_offsets[j];
440
+ int same = compare(&current, &next, &compare_ctx);
441
+
442
+ if (same == 0) {
443
+ count++;
444
+ } else {
445
+ break;
446
+ }
447
+ }
448
+
449
+ i = j;
450
+
451
+ size_t stack_size = *(frames->as.frames + current);
452
+
453
+ VALUE type = *(frames->as.frames + current + stack_size + 2);
454
+
455
+ rb_ary_push(unique_frames,
456
+ rb_ary_new3(3,
457
+ type,
458
+ INT2NUM(count + 1),
459
+ make_frame_info(frames->as.frames + current, lines->as.lines + current)));
460
+
461
+ }
462
+
463
+ xfree(record_offsets);
464
+
465
+ result = unique_frames;
466
+ }
467
+
468
+ return result;
469
+ }
470
+
471
+ static VALUE
472
+ initialize(int argc, VALUE *argv, VALUE self)
473
+ {
474
+ VALUE opts;
475
+ trace_stats_t * stats;
476
+
477
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
478
+ rb_scan_args(argc, argv, ":", &opts);
479
+ if (!NIL_P(opts)) {
480
+ ID ids[2];
481
+ VALUE args[2];
482
+ ids[0] = rb_intern("interval");
483
+ rb_get_kwargs(opts, ids, 0, 1, args);
484
+
485
+ if (args[0] != Qundef) {
486
+ stats->interval = NUM2INT(args[0]);
487
+ }
488
+ }
489
+
490
+ return self;
491
+ }
492
+
493
+ static VALUE
494
+ interval(VALUE self)
495
+ {
496
+ trace_stats_t * stats;
497
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
498
+ return INT2NUM(stats->interval);
499
+ }
500
+
501
+ static VALUE
502
+ allocation_count(VALUE self)
503
+ {
504
+ trace_stats_t * stats;
505
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
506
+ return INT2NUM(stats->allocation_count);
507
+ }
508
+
509
+ static VALUE
510
+ overall_samples(VALUE self)
511
+ {
512
+ trace_stats_t * stats;
513
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
514
+ return INT2NUM(stats->overall_samples);
515
+ }
516
+
517
+ void
518
+ Init_allocation_sampler(void)
519
+ {
520
+ VALUE rb_mObjSpace = rb_const_get(rb_cObject, rb_intern("ObjectSpace"));
521
+
522
+ rb_cAllocationSampler = rb_define_class_under(rb_mObjSpace, "AllocationSampler", rb_cObject);
523
+ rb_define_alloc_func(rb_cAllocationSampler, allocate);
524
+ rb_define_method(rb_cAllocationSampler, "initialize", initialize, -1);
525
+ rb_define_method(rb_cAllocationSampler, "enable", enable, 0);
526
+ rb_define_method(rb_cAllocationSampler, "disable", disable, 0);
527
+ rb_define_method(rb_cAllocationSampler, "frames", frames, 0);
528
+ rb_define_method(rb_cAllocationSampler, "samples", samples, 0);
529
+ rb_define_method(rb_cAllocationSampler, "interval", interval, 0);
530
+ rb_define_method(rb_cAllocationSampler, "allocation_count", allocation_count, 0);
531
+ rb_define_method(rb_cAllocationSampler, "overall_samples", overall_samples, 0);
532
+ }
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ $CFLAGS << " -Wno-declaration-after-statement"
3
+ create_makefile('allocation_sampler')
@@ -0,0 +1,226 @@
1
+ /* Obtained from https://github.com/noporpoise/sort_r on 2018-09-19 */
2
+ /* Isaac Turner 29 April 2014 Public Domain */
3
+ #ifndef SORT_R_H_
4
+ #define SORT_R_H_
5
+
6
+ #include <stdlib.h>
7
+ #include <string.h>
8
+
9
+ /*
10
+
11
+ sort_r function to be exported.
12
+
13
+ Parameters:
14
+ base is the array to be sorted
15
+ nel is the number of elements in the array
16
+ width is the size in bytes of each element of the array
17
+ compar is the comparison function
18
+ arg is a pointer to be passed to the comparison function
19
+
20
+ void sort_r(void *base, size_t nel, size_t width,
21
+ int (*compar)(const void *_a, const void *_b, void *_arg),
22
+ void *arg);
23
+
24
+ */
25
+
26
+ #define _SORT_R_INLINE inline
27
+
28
+ #if (defined __APPLE__ || defined __MACH__ || defined __DARWIN__ || \
29
+ defined __FreeBSD__ || defined __DragonFly__)
30
+ # define _SORT_R_BSD
31
+ #elif (defined _GNU_SOURCE || defined __gnu_hurd__ || defined __GNU__ || \
32
+ defined __linux__ || defined __MINGW32__ || defined __GLIBC__)
33
+ # define _SORT_R_LINUX
34
+ #elif (defined _WIN32 || defined _WIN64 || defined __WINDOWS__)
35
+ # define _SORT_R_WINDOWS
36
+ # undef _SORT_R_INLINE
37
+ # define _SORT_R_INLINE __inline
38
+ #else
39
+ /* Using our own recursive quicksort sort_r_simple() */
40
+ #endif
41
+
42
+ #if (defined NESTED_QSORT && NESTED_QSORT == 0)
43
+ # undef NESTED_QSORT
44
+ #endif
45
+
46
+ /* swap a, b iff a>b */
47
+ /* __restrict is same as restrict but better support on old machines */
48
+ static _SORT_R_INLINE int sort_r_cmpswap(char *__restrict a, char *__restrict b, size_t w,
49
+ int (*compar)(const void *_a, const void *_b,
50
+ void *_arg),
51
+ void *arg)
52
+ {
53
+ char tmp, *end = a+w;
54
+ if(compar(a, b, arg) > 0) {
55
+ for(; a < end; a++, b++) { tmp = *a; *a = *b; *b = tmp; }
56
+ return 1;
57
+ }
58
+ return 0;
59
+ }
60
+
61
+ /* Implement recursive quicksort ourselves */
62
+ /* Note: quicksort is not stable, equivalent values may be swapped */
63
+ static _SORT_R_INLINE void sort_r_simple(void *base, size_t nel, size_t w,
64
+ int (*compar)(const void *_a, const void *_b,
65
+ void *_arg),
66
+ void *arg)
67
+ {
68
+ char *b = (char *)base, *end = b + nel*w;
69
+ if(nel < 7) {
70
+ /* Insertion sort for arbitrarily small inputs */
71
+ char *pi, *pj;
72
+ for(pi = b+w; pi < end; pi += w) {
73
+ for(pj = pi; pj > b && sort_r_cmpswap(pj-w,pj,w,compar,arg); pj -= w) {}
74
+ }
75
+ }
76
+ else
77
+ {
78
+ /* nel > 6; Quicksort */
79
+
80
+ /* Use median of first, middle and last items as pivot */
81
+ char *x, *y, *xend, ch;
82
+ char *pl, *pr;
83
+ char *last = b+w*(nel-1), *tmp;
84
+ char *l[3];
85
+ l[0] = b;
86
+ l[1] = b+w*(nel/2);
87
+ l[2] = last;
88
+
89
+ if(compar(l[0],l[1],arg) > 0) { tmp=l[0]; l[0]=l[1]; l[1]=tmp; }
90
+ if(compar(l[1],l[2],arg) > 0) {
91
+ tmp=l[1]; l[1]=l[2]; l[2]=tmp; /* swap(l[1],l[2]) */
92
+ if(compar(l[0],l[1],arg) > 0) { tmp=l[0]; l[0]=l[1]; l[1]=tmp; }
93
+ }
94
+
95
+ /* swap l[id], l[2] to put pivot as last element */
96
+ for(x = l[1], y = last, xend = x+w; x<xend; x++, y++) {
97
+ ch = *x; *x = *y; *y = ch;
98
+ }
99
+
100
+ pl = b;
101
+ pr = last;
102
+
103
+ while(pl < pr) {
104
+ for(; pl < pr; pl += w) {
105
+ if(sort_r_cmpswap(pl, pr, w, compar, arg)) {
106
+ pr -= w; /* pivot now at pl */
107
+ break;
108
+ }
109
+ }
110
+ for(; pl < pr; pr -= w) {
111
+ if(sort_r_cmpswap(pl, pr, w, compar, arg)) {
112
+ pl += w; /* pivot now at pr */
113
+ break;
114
+ }
115
+ }
116
+ }
117
+
118
+ sort_r_simple(b, (pl-b)/w, w, compar, arg);
119
+ sort_r_simple(pl+w, (end-(pl+w))/w, w, compar, arg);
120
+ }
121
+ }
122
+
123
+
124
+ #if defined NESTED_QSORT
125
+
126
+ static _SORT_R_INLINE void sort_r(void *base, size_t nel, size_t width,
127
+ int (*compar)(const void *_a, const void *_b,
128
+ void *aarg),
129
+ void *arg)
130
+ {
131
+ int nested_cmp(const void *a, const void *b)
132
+ {
133
+ return compar(a, b, arg);
134
+ }
135
+
136
+ qsort(base, nel, width, nested_cmp);
137
+ }
138
+
139
+ #else /* !NESTED_QSORT */
140
+
141
+ /* Declare structs and functions */
142
+
143
+ #if defined _SORT_R_BSD
144
+
145
+ /* Ensure qsort_r is defined */
146
+ extern void qsort_r(void *base, size_t nel, size_t width, void *thunk,
147
+ int (*compar)(void *_thunk, const void *_a, const void *_b));
148
+
149
+ #endif
150
+
151
+ #if defined _SORT_R_BSD || defined _SORT_R_WINDOWS
152
+
153
+ /* BSD (qsort_r), Windows (qsort_s) require argument swap */
154
+
155
+ struct sort_r_data
156
+ {
157
+ void *arg;
158
+ int (*compar)(const void *_a, const void *_b, void *_arg);
159
+ };
160
+
161
+ static _SORT_R_INLINE int sort_r_arg_swap(void *s, const void *a, const void *b)
162
+ {
163
+ struct sort_r_data *ss = (struct sort_r_data*)s;
164
+ return (ss->compar)(a, b, ss->arg);
165
+ }
166
+
167
+ #endif
168
+
169
+ #if defined _SORT_R_LINUX
170
+
171
+ typedef int(* __compar_d_fn_t)(const void *, const void *, void *);
172
+ extern void qsort_r(void *base, size_t nel, size_t width,
173
+ __compar_d_fn_t __compar, void *arg)
174
+ __attribute__((nonnull (1, 4)));
175
+
176
+ #endif
177
+
178
+ /* implementation */
179
+
180
+ static _SORT_R_INLINE void sort_r(void *base, size_t nel, size_t width,
181
+ int (*compar)(const void *_a, const void *_b, void *_arg),
182
+ void *arg)
183
+ {
184
+ #if defined _SORT_R_LINUX
185
+
186
+ #if defined __GLIBC__ && ((__GLIBC__ < 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8))
187
+
188
+ /* no qsort_r in glibc before 2.8, need to use nested qsort */
189
+ sort_r_simple(base, nel, width, compar, arg);
190
+
191
+ #else
192
+
193
+ qsort_r(base, nel, width, compar, arg);
194
+
195
+ #endif
196
+
197
+ #elif defined _SORT_R_BSD
198
+
199
+ struct sort_r_data tmp;
200
+ tmp.arg = arg;
201
+ tmp.compar = compar;
202
+ qsort_r(base, nel, width, &tmp, sort_r_arg_swap);
203
+
204
+ #elif defined _SORT_R_WINDOWS
205
+
206
+ struct sort_r_data tmp;
207
+ tmp.arg = arg;
208
+ tmp.compar = compar;
209
+ qsort_s(base, nel, width, sort_r_arg_swap, &tmp);
210
+
211
+ #else
212
+
213
+ /* Fall back to our own quicksort implementation */
214
+ sort_r_simple(base, nel, width, compar, arg);
215
+
216
+ #endif
217
+ }
218
+
219
+ #endif /* !NESTED_QSORT */
220
+
221
+ #undef _SORT_R_INLINE
222
+ #undef _SORT_R_WINDOWS
223
+ #undef _SORT_R_LINUX
224
+ #undef _SORT_R_BSD
225
+
226
+ #endif /* SORT_R_H_ */
@@ -0,0 +1,210 @@
1
+ require 'allocation_sampler.so'
2
+ require 'delegate'
3
+ require 'set'
4
+ require 'cgi/escape'
5
+
6
+ module ObjectSpace
7
+ class AllocationSampler
8
+ VERSION = '1.0.0'
9
+
10
+ class Frame
11
+ attr_reader :id, :name, :path, :children
12
+
13
+ def initialize id, name, path
14
+ @id = id
15
+ @name = name
16
+ @path = path
17
+ end
18
+ end
19
+
20
+ class Result
21
+ class Frame < DelegateClass(AllocationSampler::Frame)
22
+ attr_reader :line, :children
23
+ attr_accessor :samples, :total_samples
24
+
25
+ include Enumerable
26
+
27
+ def initialize frame, line, samples
28
+ super(frame)
29
+ @line = line
30
+ @samples = samples
31
+ @total_samples = 0
32
+ @children = Set.new
33
+ end
34
+
35
+ def each
36
+ seen = {}
37
+ stack = [self]
38
+
39
+ while node = stack.pop
40
+ next if seen[node]
41
+ seen[node] = true
42
+ yield node
43
+ stack.concat node.children.to_a
44
+ end
45
+ end
46
+
47
+ def to_dot
48
+ seen = {}
49
+ "digraph allocations {\n" +
50
+ " node[shape=record];\n" + print_edges(self, seen, total_samples) + "}\n"
51
+ end
52
+
53
+ private
54
+
55
+ def print_edges node, seen, total_samples
56
+ return '' if seen[node.id]
57
+ seen[node.id] = node
58
+ " #{node.id} [label=\"#{CGI.escapeHTML node.name}\"];\n" +
59
+ node.children.map { |child|
60
+ ratio = child.total_samples / total_samples.to_f
61
+ width = (1 * ratio) + 1
62
+ " #{node.id} -> #{child.id} [penwidth=#{width}];\n" + print_edges(child, seen, total_samples)
63
+ }.join
64
+ end
65
+ end
66
+
67
+ attr_reader :samples, :frames
68
+
69
+ def initialize samples, frames
70
+ @samples = samples.sort_by! { |s| s[1] }.reverse!
71
+ @frames = frames
72
+ end
73
+
74
+ def allocations_by_type
75
+ @samples.each_with_object(Hash.new(0)) do |(type, count, _), h|
76
+ h[type] += count
77
+ end
78
+ end
79
+
80
+ def allocations_with_top_frame
81
+ @samples.each_with_object({}) do |(type, count, stack), h|
82
+ top_frame_id, line = stack.first
83
+ frame = @frames[top_frame_id]
84
+ ((h[type] ||= {})[frame.path] ||= {})[line] = count
85
+ end
86
+ end
87
+
88
+ def calltree
89
+ frame_delegates = {}
90
+ @samples.map { |type, count, stack|
91
+ build_tree(stack, count, frame_delegates)
92
+ }.uniq.first
93
+ end
94
+
95
+ def by_type_with_call_tree
96
+ types_with_stacks = @samples.group_by(&:first)
97
+ types_with_stacks.transform_values do |stacks|
98
+ frame_delegates = {}
99
+ stacks.map { |_, count, stack|
100
+ build_tree(stack, count, frame_delegates)
101
+ }.uniq.first
102
+ end
103
+ end
104
+
105
+ private
106
+
107
+ def build_tree stack, count, frame_delegates
108
+ top_down = stack.reverse
109
+ last_caller = nil
110
+ seen = Set.new
111
+ root = nil
112
+ top_frame_id, top_line = stack.first
113
+ top = frame_delegates[top_frame_id] ||= build_frame(top_frame_id, top_line, 0)
114
+ top.samples += count
115
+ top_down.each do |frame_id, line|
116
+ frame = frame_delegates[frame_id] ||= build_frame(frame_id, line, 0)
117
+ root ||= frame
118
+ if last_caller
119
+ last_caller.children << frame
120
+ end
121
+ last_caller = frame
122
+ last_caller.total_samples += count unless seen.include?(frame_id)
123
+ seen << frame_id
124
+ end
125
+ root
126
+ end
127
+
128
+ def build_frame id, line, samples
129
+ Frame.new @frames[id], line, samples
130
+ end
131
+ end
132
+
133
+ def result
134
+ Result.new samples, frames
135
+ end
136
+
137
+ module Display
138
+ class Stack < DelegateClass(IO)
139
+ attr_reader :max_depth
140
+
141
+ def initialize output: $stdout
142
+ super(output)
143
+ end
144
+
145
+ def show frames
146
+ max_width = max_width(frames, 0, {})
147
+ display(frames, 0, frames.total_samples, [], {}, max_width)
148
+ end
149
+
150
+ private
151
+
152
+ def max_width frame, depth, seen
153
+ if seen.key? frame
154
+ return 0
155
+ end
156
+
157
+ seen[frame] = true
158
+
159
+ my_length = (depth * 4) + frame.name.length
160
+
161
+ frame.children.each do |caller|
162
+ child_len = max_width caller, depth + 1, seen
163
+ my_length = child_len if my_length < child_len
164
+ end
165
+
166
+ my_length
167
+ end
168
+
169
+ def display frame, depth, total_samples, last_stack, seen, max_width
170
+ seen[frame] = true
171
+
172
+
173
+ buffer = max_width - ((depth * 4) + frame.name.length)
174
+
175
+ self_samples = frame.samples
176
+ last_stack.each_with_index do |last, i|
177
+ if i == last_stack.length - 1
178
+ if last
179
+ printf "`-- "
180
+ else
181
+ printf "|-- "
182
+ end
183
+ else
184
+ if last
185
+ printf " "
186
+ else
187
+ printf "| "
188
+ end
189
+ end
190
+ end
191
+
192
+
193
+ printf frame.name
194
+ printf " " * buffer
195
+ printf "% d % 8s", self_samples, "(%2.1f%%)" % (self_samples*100.0/total_samples)
196
+ puts
197
+
198
+ children = (frame.children || []).sort_by { |ie|
199
+ -ie.total_samples
200
+ }.reject { |f| seen[f] }
201
+
202
+ children.each_with_index do |child, i|
203
+ s = last_stack + [i == children.length - 1]
204
+ display child, depth + 1, total_samples, s, seen, max_width
205
+ end
206
+ end
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,170 @@
1
+ require 'minitest/autorun'
2
+ require 'allocation_sampler'
3
+
4
+ class TestAllocationSampler < Minitest::Test
5
+ def test_initialize
6
+ assert ObjectSpace::AllocationSampler.new
7
+ end
8
+
9
+ def test_init_with_params
10
+ as = ObjectSpace::AllocationSampler.new(interval: 10)
11
+ assert_equal 10, as.interval
12
+ end
13
+
14
+ def test_init_with_location
15
+ iseq = RubyVM::InstructionSequence.new <<-eoruby
16
+ Object.new
17
+ Object.new
18
+ eoruby
19
+ as = ObjectSpace::AllocationSampler.new(interval: 1)
20
+ as.enable
21
+ iseq.eval
22
+ as.disable
23
+
24
+ assert_equal({"Object"=>{"<compiled>"=>{1=>1, 2=>1}}}, filter(as.result))
25
+ end
26
+
27
+ def test_location_same_line
28
+ iseq = RubyVM::InstructionSequence.new <<-eoruby
29
+ 10.times { Object.new }
30
+ eoruby
31
+ as = ObjectSpace::AllocationSampler.new(interval: 1)
32
+ as.enable
33
+ iseq.eval
34
+ as.disable
35
+
36
+ assert_equal({"Object"=>{"<compiled>"=>{1=>10}}}, filter(as.result))
37
+ end
38
+
39
+ def test_location_mixed
40
+ iseq = RubyVM::InstructionSequence.new <<-eoruby
41
+ 10.times { Object.new }
42
+ Object.new
43
+ eoruby
44
+ as = ObjectSpace::AllocationSampler.new(interval: 1)
45
+ as.enable
46
+ iseq.eval
47
+ as.disable
48
+
49
+ assert_equal({"Object"=>{"<compiled>"=>{1=>10, 2=>1}}}, filter(as.result))
50
+ end
51
+
52
+ def test_location_from_method
53
+ iseq = RubyVM::InstructionSequence.new <<-eoruby
54
+ def foo
55
+ 10.times { Object.new }
56
+ Object.new
57
+ end
58
+ foo
59
+ eoruby
60
+ as = ObjectSpace::AllocationSampler.new(interval: 1)
61
+ as.enable
62
+ iseq.eval
63
+ as.disable
64
+
65
+ assert_equal({"Object"=>{"<compiled>"=>{2=>10, 3=>1}}}, filter(as.result))
66
+ end
67
+
68
+ def test_location_larger_interval
69
+ iseq = RubyVM::InstructionSequence.new <<-eom
70
+ 100.times { Object.new }
71
+ 100.times { Object.new }
72
+ eom
73
+ as = ObjectSpace::AllocationSampler.new(interval: 10)
74
+ as.enable
75
+ iseq.eval
76
+ as.disable
77
+
78
+ assert_equal({"Object"=>{"<compiled>"=>{1=>10, 2=>10}}}, filter(as.result))
79
+ assert_equal 201, as.allocation_count
80
+ end
81
+
82
+ def test_interval_default
83
+ as = ObjectSpace::AllocationSampler.new
84
+ assert_equal 1, as.interval
85
+ end
86
+
87
+ def test_two_with_same_type
88
+ as = ObjectSpace::AllocationSampler.new
89
+ as.enable
90
+ Object.new
91
+ Object.new
92
+ as.disable
93
+
94
+ assert_equal(2, filter(as.result)[Object.name].values.flat_map(&:values).inject(:+))
95
+ end
96
+
97
+ def test_two_with_same_type_same_line
98
+ as = ObjectSpace::AllocationSampler.new
99
+ as.enable
100
+ Object.new; Object.new
101
+ Object.new; Object.new
102
+ as.disable
103
+
104
+ assert_equal 4, as.result.allocations_by_type[Object.name]
105
+ end
106
+
107
+ class X
108
+ end
109
+
110
+ def test_expands
111
+ as = ObjectSpace::AllocationSampler.new
112
+ as.enable
113
+ 500.times do
114
+ Object.new
115
+ X.new
116
+ end
117
+ Object.new
118
+ as.disable
119
+
120
+ result = as.result
121
+ assert_equal 501, result.allocations_by_type[Object.name]
122
+ assert_equal 500, result.allocations_by_type[TestAllocationSampler::X.name]
123
+ end
124
+
125
+ def d
126
+ Object.new
127
+ end
128
+ def c; 5.times { d }; end
129
+ def b; 5.times { c }; end
130
+ def a; 5.times { b }; end
131
+
132
+ def test_stack_trace
133
+ as = ObjectSpace::AllocationSampler.new
134
+ buffer = StringIO.new
135
+ stack_printer = ObjectSpace::AllocationSampler::Display::Stack.new(
136
+ output: buffer
137
+ )
138
+ as.enable
139
+ a
140
+ as.disable
141
+
142
+ as.result.by_type_with_call_tree.each do |class_name, tree|
143
+ assert_equal Object.name, class_name
144
+ root = tree.find { |node| node.name.include? __method__.to_s }
145
+ stack_printer.show root
146
+ end
147
+ assert_equal <<-eoout, buffer.string
148
+ TestAllocationSampler#test_stack_trace 0 (0.0%)
149
+ `-- TestAllocationSampler#a 0 (0.0%)
150
+ `-- TestAllocationSampler#b 0 (0.0%)
151
+ `-- TestAllocationSampler#c 0 (0.0%)
152
+ `-- TestAllocationSampler#d 125 (100.0%)
153
+ eoout
154
+ end
155
+
156
+ def test_dot
157
+ as = ObjectSpace::AllocationSampler.new
158
+ as.enable
159
+ a
160
+ as.disable
161
+
162
+ File.write 'out.dot', as.result.calltree.to_dot
163
+ end
164
+
165
+ private
166
+
167
+ def filter result
168
+ result.allocations_with_top_frame
169
+ end
170
+ end
metadata ADDED
@@ -0,0 +1,109 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: allocation_sampler
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Aaron Patterson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-01-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: minitest
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '5.11'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '5.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rdoc
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '4.0'
34
+ - - "<"
35
+ - !ruby/object:Gem::Version
36
+ version: '7'
37
+ type: :development
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '4.0'
44
+ - - "<"
45
+ - !ruby/object:Gem::Version
46
+ version: '7'
47
+ - !ruby/object:Gem::Dependency
48
+ name: hoe
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '3.17'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '3.17'
61
+ description: |-
62
+ A sampling allocation profiler. This keeps track of allocations, but only on
63
+ specified intervals. Useful for profiling allocations in programs where there
64
+ is a time limit on completion of the program.
65
+ email:
66
+ - aaron@tenderlovemaking.com
67
+ executables: []
68
+ extensions:
69
+ - ext/allocation_sampler/extconf.rb
70
+ extra_rdoc_files:
71
+ - CHANGELOG.md
72
+ - Manifest.txt
73
+ - README.md
74
+ files:
75
+ - CHANGELOG.md
76
+ - Manifest.txt
77
+ - README.md
78
+ - Rakefile
79
+ - ext/allocation_sampler/allocation_sampler.c
80
+ - ext/allocation_sampler/extconf.rb
81
+ - ext/allocation_sampler/sort_r.h
82
+ - lib/allocation_sampler.rb
83
+ - test/test_allocation_sampler.rb
84
+ homepage: https://github.com/tenderlove/allocation_sampler
85
+ licenses:
86
+ - MIT
87
+ metadata: {}
88
+ post_install_message:
89
+ rdoc_options:
90
+ - "--main"
91
+ - README.md
92
+ require_paths:
93
+ - lib
94
+ required_ruby_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ required_rubygems_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubygems_version: 3.0.2
106
+ signing_key:
107
+ specification_version: 4
108
+ summary: A sampling allocation profiler
109
+ test_files: []