allocation_sampler 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 268a796ca59b96cfcba16f0ee5778de979a9295adb7b7d32285d7749fabfb7e7
4
+ data.tar.gz: 76c4e4773f3ed799f461569db97a4541e873cdde5bad3ab1dbfe589c21991647
5
+ SHA512:
6
+ metadata.gz: 7d25b1f773144e4c60dc0eb2ac8f38f9623de0c6e5e7177ccd871c58edb0d018d7505c400f62423d2dce7c921dc49b5e9f98fb8c87e6448b4225fe82f9c04303
7
+ data.tar.gz: da097230b8c5d24f9c1d8c85e1cc9bd0e11a2743232ee1cc936ccde713c773fcbf85ce6ef0a4a48a170f522ffc5480b18b4bdd4532041ca9cb677139dc62683a
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2018-08-06
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birthday!
6
+
@@ -0,0 +1,9 @@
1
+ CHANGELOG.md
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ ext/allocation_sampler/allocation_sampler.c
6
+ ext/allocation_sampler/sort_r.h
7
+ ext/allocation_sampler/extconf.rb
8
+ lib/allocation_sampler.rb
9
+ test/test_allocation_sampler.rb
@@ -0,0 +1,45 @@
1
+ # allocation_sampler
2
+
3
+ * https://github.com/tenderlove/allocation_sampler
4
+
5
+ ## DESCRIPTION:
6
+
7
+ A sampling allocation profiler. This keeps track of allocations, but only on
8
+ specified intervals. Useful for profiling allocations in programs where there
9
+ is a time limit on completion of the program.
10
+
11
+ ## SYNOPSIS:
12
+
13
+ ```ruby
14
+ as = ObjectSpace::AllocationSampler.new(interval: 1)
15
+ as.enable
16
+ 10.times { Object.new }
17
+ as.disable
18
+
19
+ as.result # => {"Object"=>{"<compiled>"=>{1=>10}}}
20
+ ```
21
+
22
+ ## LICENSE:
23
+
24
+ (The MIT License)
25
+
26
+ Copyright (c) 2018 Aaron Patterson
27
+
28
+ Permission is hereby granted, free of charge, to any person obtaining
29
+ a copy of this software and associated documentation files (the
30
+ 'Software'), to deal in the Software without restriction, including
31
+ without limitation the rights to use, copy, modify, merge, publish,
32
+ distribute, sublicense, and/or sell copies of the Software, and to
33
+ permit persons to whom the Software is furnished to do so, subject to
34
+ the following conditions:
35
+
36
+ The above copyright notice and this permission notice shall be
37
+ included in all copies or substantial portions of the Software.
38
+
39
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
40
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
41
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
42
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
43
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
44
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
45
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,26 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require 'rake/extensiontask'
6
+
7
+ Hoe.plugin :minitest
8
+ Hoe.plugin :gemspec # `gem install hoe-gemspec`
9
+ Hoe.plugin :git # `gem install hoe-git`
10
+
11
+ HOE = Hoe.spec 'allocation_sampler' do
12
+ developer('Aaron Patterson', 'aaron@tenderlovemaking.com')
13
+ self.readme_file = 'README.md'
14
+ self.history_file = 'CHANGELOG.md'
15
+ self.extra_rdoc_files = FileList['*.md']
16
+ self.license 'MIT'
17
+ self.spec_extras = {
18
+ :extensions => ["ext/allocation_sampler/extconf.rb"],
19
+ }
20
+ end
21
+
22
+ Rake::ExtensionTask.new("allocation_sampler", HOE.spec)
23
+
24
+ task :default => [:compile, :test]
25
+
26
+ # vim: syntax=ruby
@@ -0,0 +1,532 @@
1
+ #include <ruby/ruby.h>
2
+ #include <ruby/debug.h>
3
+ #include <stdlib.h>
4
+ #include "sort_r.h"
5
+
6
+ typedef struct {
7
+ char frames;
8
+ size_t capa;
9
+ size_t next_free;
10
+ size_t prev_free;
11
+ size_t record_count;
12
+ union {
13
+ VALUE *frames;
14
+ int *lines;
15
+ } as;
16
+ } sample_buffer_t;
17
+
18
+ typedef struct {
19
+ size_t interval;
20
+ size_t allocation_count;
21
+ size_t overall_samples;
22
+ sample_buffer_t * stack_samples;
23
+ sample_buffer_t * lines_samples;
24
+ VALUE newobj_hook;
25
+ } trace_stats_t;
26
+
27
+ typedef struct {
28
+ sample_buffer_t * frames;
29
+ sample_buffer_t * lines;
30
+ } compare_data_t;
31
+
32
+ static void
33
+ free_sample_buffer(sample_buffer_t *buffer)
34
+ {
35
+ if (buffer->frames) {
36
+ xfree(buffer->as.lines);
37
+ } else {
38
+ xfree(buffer->as.frames);
39
+ }
40
+ xfree(buffer);
41
+ }
42
+
43
+ static sample_buffer_t *
44
+ alloc_lines_buffer(size_t size)
45
+ {
46
+ sample_buffer_t * samples = xcalloc(sizeof(sample_buffer_t), 1);
47
+ samples->as.lines = xcalloc(sizeof(int), size);
48
+ samples->capa = size;
49
+ samples->frames = 0;
50
+ return samples;
51
+ }
52
+
53
+ static sample_buffer_t *
54
+ alloc_frames_buffer(size_t size)
55
+ {
56
+ sample_buffer_t * samples = xcalloc(sizeof(sample_buffer_t), 1);
57
+ samples->as.frames = xcalloc(sizeof(VALUE), size);
58
+ samples->capa = size;
59
+ samples->frames = 1;
60
+ return samples;
61
+ }
62
+
63
+ static void
64
+ ensure_sample_buffer_capa(sample_buffer_t * buffer, size_t size)
65
+ {
66
+ /* If we can't fit all the samples in the buffer, double the buffer size. */
67
+ while (buffer->capa <= (buffer->next_free - 1) + (size + 2)) {
68
+ buffer->capa *= 2;
69
+ if (buffer->frames) {
70
+ buffer->as.frames = xrealloc(buffer->as.frames, sizeof(VALUE) * buffer->capa);
71
+ } else {
72
+ buffer->as.lines = xrealloc(buffer->as.lines, sizeof(int) * buffer->capa);
73
+ }
74
+ }
75
+ }
76
+
77
+ static void
78
+ dealloc(void *ptr)
79
+ {
80
+ trace_stats_t * stats = (trace_stats_t *)ptr;
81
+ sample_buffer_t * frames;
82
+ sample_buffer_t * lines;
83
+
84
+ frames = stats->stack_samples;
85
+ lines = stats->lines_samples;
86
+
87
+ if (frames && lines) {
88
+ free_sample_buffer(frames);
89
+ free_sample_buffer(lines);
90
+ }
91
+ xfree(stats);
92
+ }
93
+
94
+ static VALUE
95
+ make_frame_info(VALUE *frames, int *lines)
96
+ {
97
+ size_t count, i;
98
+ VALUE rb_frames;
99
+
100
+ count = *frames;
101
+ frames++;
102
+ lines++;
103
+
104
+ rb_frames = rb_ary_new_capa(count);
105
+
106
+ for(i = 0; i < count; i++, frames++, lines++) {
107
+ VALUE line = INT2NUM(*lines);
108
+ rb_ary_push(rb_frames, rb_ary_new3(2, rb_obj_id(*frames), line));
109
+ }
110
+
111
+ return rb_frames;
112
+ }
113
+
114
+ static int
115
+ compare(const void* l, const void* r, void* ctx)
116
+ {
117
+ compare_data_t *compare_data = (compare_data_t *)ctx;
118
+ sample_buffer_t *stacks = compare_data->frames;
119
+ sample_buffer_t *lines = compare_data->lines;
120
+
121
+ size_t left_offset = *(const size_t*)l;
122
+ size_t right_offset = *(const size_t*)r;
123
+
124
+ size_t lstack = *(stacks->as.frames + left_offset);
125
+ size_t rstack = *(stacks->as.frames + right_offset);
126
+
127
+ if (lstack == rstack) {
128
+ /* Compare the stack plus type info */
129
+ int stack_cmp = memcmp(stacks->as.frames + left_offset,
130
+ stacks->as.frames + right_offset,
131
+ (lstack + 3) * sizeof(VALUE *));
132
+
133
+ if (stack_cmp == 0) {
134
+ /* If the stacks are the same, check the line numbers */
135
+ int line_cmp = memcmp(lines->as.lines + left_offset + 1,
136
+ lines->as.lines + right_offset + 1,
137
+ lstack * sizeof(int));
138
+
139
+ return line_cmp;
140
+ } else {
141
+ return stack_cmp;
142
+ }
143
+ } else {
144
+ if (lstack < rstack) {
145
+ return -1;
146
+ } else {
147
+ return 1;
148
+ }
149
+ }
150
+ }
151
+
152
+ static void
153
+ mark(void * ptr)
154
+ {
155
+ trace_stats_t * stats = (trace_stats_t *)ptr;
156
+ sample_buffer_t * stacks;
157
+
158
+ stacks = stats->stack_samples;
159
+
160
+ if (stacks) {
161
+ VALUE * frame = stacks->as.frames;
162
+
163
+ while(frame < stacks->as.frames + stacks->next_free) {
164
+ size_t stack_size;
165
+ VALUE * head;
166
+
167
+ stack_size = *frame;
168
+ frame++; /* First element is the stack size */
169
+ head = frame;
170
+
171
+ for(; frame < (head + stack_size); frame++) {
172
+ rb_gc_mark(*frame);
173
+ }
174
+ frame++; /* Frame info */
175
+ rb_gc_mark(*frame);
176
+ frame++; /* Next Head */
177
+ }
178
+ }
179
+
180
+ if (stats->newobj_hook) {
181
+ rb_gc_mark(stats->newobj_hook);
182
+ }
183
+ }
184
+
185
+ static const rb_data_type_t trace_stats_type = {
186
+ "ObjectSpace/AllocationSampler",
187
+ {mark, dealloc, 0,},
188
+ 0, 0,
189
+ #ifdef RUBY_TYPED_FREE_IMMEDIATELY
190
+ RUBY_TYPED_FREE_IMMEDIATELY,
191
+ #endif
192
+ };
193
+
194
+ static VALUE
195
+ user_class(VALUE klass, VALUE obj)
196
+ {
197
+ if (RTEST(klass) && !(RB_TYPE_P(obj, T_IMEMO) || RB_TYPE_P(obj, T_NODE)) && BUILTIN_TYPE(klass) == T_CLASS) {
198
+ return rb_class_path_cached(rb_class_real(klass));
199
+ } else {
200
+ return Qnil;
201
+ }
202
+ }
203
+
204
+ #define BUF_SIZE 2048
205
+
206
+ static void
207
+ newobj(VALUE tpval, void *ptr)
208
+ {
209
+ trace_stats_t * stats = (trace_stats_t *)ptr;
210
+
211
+ if (!(stats->allocation_count % stats->interval)) {
212
+ rb_trace_arg_t *tparg = rb_tracearg_from_tracepoint(tpval);
213
+ VALUE obj = rb_tracearg_object(tparg);
214
+ VALUE klass = RBASIC_CLASS(obj);
215
+ VALUE uc = user_class(klass, obj);
216
+
217
+ if (!NIL_P(uc)) {
218
+ VALUE frames_buffer[BUF_SIZE];
219
+ int lines_buffer[BUF_SIZE];
220
+
221
+ VALUE path = rb_tracearg_path(tparg);
222
+
223
+ if (RTEST(path)) {
224
+ sample_buffer_t * stack_samples;
225
+ sample_buffer_t * lines_samples;
226
+
227
+ int num = rb_profile_frames(0, sizeof(frames_buffer) / sizeof(VALUE), frames_buffer, lines_buffer);
228
+ if (!stats->stack_samples) {
229
+ stats->stack_samples = alloc_frames_buffer(num * 100);
230
+ stats->lines_samples = alloc_lines_buffer(num * 100);
231
+ }
232
+ stack_samples = stats->stack_samples;
233
+ lines_samples = stats->lines_samples;
234
+
235
+ ensure_sample_buffer_capa(stack_samples, num + 2);
236
+ ensure_sample_buffer_capa(lines_samples, num + 2);
237
+
238
+ stack_samples->prev_free = stack_samples->next_free;
239
+ lines_samples->prev_free = lines_samples->next_free;
240
+
241
+ stack_samples->as.frames[stack_samples->next_free] = (VALUE)num;
242
+ lines_samples->as.lines[lines_samples->next_free] = (VALUE)num;
243
+
244
+ memcpy(stack_samples->as.frames + stack_samples->next_free + 1, frames_buffer, num * sizeof(VALUE *));
245
+ memcpy(lines_samples->as.lines + lines_samples->next_free + 1, lines_buffer, num * sizeof(int));
246
+
247
+ /* We're not doing de-duping right now, so just set the stack count to 0xdeadbeef */
248
+ stack_samples->as.frames[stack_samples->next_free + num + 1] = 0xdeadbeef;
249
+ stack_samples->as.frames[stack_samples->next_free + num + 2] = uc;
250
+
251
+ lines_samples->as.lines[stack_samples->next_free + num + 1] = 0xdeadbeef;
252
+ lines_samples->as.lines[stack_samples->next_free + num + 2] = uc;
253
+
254
+ stack_samples->next_free += (num + 3);
255
+ lines_samples->next_free += (num + 3);
256
+
257
+ stack_samples->record_count++;
258
+ lines_samples->record_count++;
259
+
260
+ stats->overall_samples++;
261
+ }
262
+ }
263
+ }
264
+ stats->allocation_count++;
265
+ }
266
+
267
+ static VALUE
268
+ allocate(VALUE klass)
269
+ {
270
+ trace_stats_t * stats;
271
+ stats = xcalloc(sizeof(trace_stats_t), 1);
272
+ stats->interval = 1;
273
+ stats->newobj_hook = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_NEWOBJ, newobj, stats);
274
+
275
+ return TypedData_Wrap_Struct(klass, &trace_stats_type, stats);
276
+ }
277
+
278
+ VALUE rb_cAllocationSampler;
279
+
280
+ static VALUE
281
+ enable(VALUE self)
282
+ {
283
+ trace_stats_t * stats;
284
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
285
+ rb_tracepoint_enable(stats->newobj_hook);
286
+ return Qnil;
287
+ }
288
+
289
+ static VALUE
290
+ disable(VALUE self)
291
+ {
292
+ trace_stats_t * stats;
293
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
294
+ rb_tracepoint_disable(stats->newobj_hook);
295
+ return Qnil;
296
+ }
297
+
298
+ static int
299
+ sort_frames(const void *left, const void *right)
300
+ {
301
+ const VALUE *vleft = (const VALUE *)left;
302
+ const VALUE *vright = (const VALUE *)right;
303
+ /* Sort so that 0 is always at the right */
304
+ if (*vleft == *vright) {
305
+ return 0;
306
+ } else {
307
+ if (*vleft == 0) {
308
+ return 1;
309
+ } else if (*vright == 0) {
310
+ return -1;
311
+ }
312
+ }
313
+ return *vleft - *vright;
314
+ }
315
+
316
+ static VALUE
317
+ frames(VALUE self)
318
+ {
319
+ trace_stats_t * stats;
320
+ sample_buffer_t * frame_buffer;
321
+ VALUE frames;
322
+ VALUE *samples;
323
+ VALUE *head;
324
+ VALUE rb_cFrame;
325
+
326
+ size_t buffer_size;
327
+
328
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
329
+
330
+ frame_buffer = stats->stack_samples;
331
+
332
+ if (!frame_buffer) {
333
+ return Qnil;
334
+ }
335
+
336
+ buffer_size = frame_buffer->next_free;
337
+
338
+ samples = xcalloc(sizeof(VALUE), buffer_size);
339
+ memcpy(samples, frame_buffer->as.frames, buffer_size * sizeof(VALUE));
340
+
341
+ /* Clear anything that's not a frame */
342
+ for(head = samples; head < (samples + buffer_size - 1); head++) {
343
+ size_t frame_count;
344
+ frame_count = *head;
345
+
346
+ *head = 0;
347
+ head++; /* Skip the count */
348
+ head += frame_count; /* Skip the stack */
349
+ *head = 0; /* Set the de-dup count to 0 */
350
+ head++;
351
+ *head = 0; /* Set the type to 0 */
352
+ }
353
+
354
+ qsort(samples, buffer_size, sizeof(VALUE *), sort_frames);
355
+
356
+ frames = rb_hash_new();
357
+
358
+ rb_cFrame = rb_const_get(rb_cAllocationSampler, rb_intern("Frame"));
359
+
360
+ for(head = samples; head < (samples + buffer_size); ) {
361
+ if (*head == 0)
362
+ break;
363
+
364
+ VALUE file;
365
+ VALUE frame;
366
+
367
+ file = rb_profile_frame_absolute_path(*(VALUE *)head);
368
+ if (NIL_P(file))
369
+ file = rb_profile_frame_path(*head);
370
+
371
+ VALUE args[3];
372
+
373
+ args[0] = rb_obj_id(*head);
374
+ args[1] = rb_profile_frame_full_label(*head);
375
+ args[2] = file;
376
+
377
+ frame = rb_class_new_instance(3, args, rb_cFrame);
378
+
379
+ rb_hash_aset(frames, rb_obj_id(*head), frame);
380
+
381
+ /* Skip duplicates */
382
+ VALUE *cmp;
383
+ for (cmp = head + 1; cmp < (samples + buffer_size); cmp++) {
384
+ if (*cmp != *head) {
385
+ break;
386
+ }
387
+ }
388
+ head = cmp;
389
+ }
390
+
391
+ xfree(samples);
392
+
393
+ return frames;
394
+ }
395
+
396
+ static VALUE
397
+ samples(VALUE self)
398
+ {
399
+ trace_stats_t * stats;
400
+ sample_buffer_t * frames;
401
+ sample_buffer_t * lines;
402
+ size_t *record_offsets;
403
+ VALUE result = Qnil;
404
+
405
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
406
+
407
+ frames = stats->stack_samples;
408
+ lines = stats->lines_samples;
409
+
410
+ if (frames && lines) {
411
+ size_t i, j;
412
+ size_t * head;
413
+ VALUE * frame = frames->as.frames;
414
+ compare_data_t compare_ctx;
415
+ compare_ctx.frames = frames;
416
+ compare_ctx.lines = lines;
417
+
418
+ record_offsets = xcalloc(sizeof(size_t), frames->record_count);
419
+ head = record_offsets;
420
+
421
+ i = 0;
422
+ while(frame < frames->as.frames + frames->next_free) {
423
+ *head = i; /* Store the frame start offset */
424
+ head++; /* Move to the next entry in record_offsets */
425
+ i += (*frame + 3); /* Increase the offset */
426
+ frame += (*frame + 3); /* Move to the next frame */
427
+ }
428
+
429
+ sort_r(record_offsets, frames->record_count, sizeof(size_t), compare, &compare_ctx);
430
+
431
+ VALUE unique_frames = rb_ary_new();
432
+
433
+ for(i = 0; i < frames->record_count; ) {
434
+ size_t current = record_offsets[i];
435
+ size_t count = 0;
436
+
437
+ /* Count any duplicate stacks ahead of us in the array */
438
+ for (j = i+1; j < frames->record_count; j++) {
439
+ size_t next = record_offsets[j];
440
+ int same = compare(&current, &next, &compare_ctx);
441
+
442
+ if (same == 0) {
443
+ count++;
444
+ } else {
445
+ break;
446
+ }
447
+ }
448
+
449
+ i = j;
450
+
451
+ size_t stack_size = *(frames->as.frames + current);
452
+
453
+ VALUE type = *(frames->as.frames + current + stack_size + 2);
454
+
455
+ rb_ary_push(unique_frames,
456
+ rb_ary_new3(3,
457
+ type,
458
+ INT2NUM(count + 1),
459
+ make_frame_info(frames->as.frames + current, lines->as.lines + current)));
460
+
461
+ }
462
+
463
+ xfree(record_offsets);
464
+
465
+ result = unique_frames;
466
+ }
467
+
468
+ return result;
469
+ }
470
+
471
+ static VALUE
472
+ initialize(int argc, VALUE *argv, VALUE self)
473
+ {
474
+ VALUE opts;
475
+ trace_stats_t * stats;
476
+
477
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
478
+ rb_scan_args(argc, argv, ":", &opts);
479
+ if (!NIL_P(opts)) {
480
+ ID ids[2];
481
+ VALUE args[2];
482
+ ids[0] = rb_intern("interval");
483
+ rb_get_kwargs(opts, ids, 0, 1, args);
484
+
485
+ if (args[0] != Qundef) {
486
+ stats->interval = NUM2INT(args[0]);
487
+ }
488
+ }
489
+
490
+ return self;
491
+ }
492
+
493
+ static VALUE
494
+ interval(VALUE self)
495
+ {
496
+ trace_stats_t * stats;
497
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
498
+ return INT2NUM(stats->interval);
499
+ }
500
+
501
+ static VALUE
502
+ allocation_count(VALUE self)
503
+ {
504
+ trace_stats_t * stats;
505
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
506
+ return INT2NUM(stats->allocation_count);
507
+ }
508
+
509
+ static VALUE
510
+ overall_samples(VALUE self)
511
+ {
512
+ trace_stats_t * stats;
513
+ TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats);
514
+ return INT2NUM(stats->overall_samples);
515
+ }
516
+
517
+ void
518
+ Init_allocation_sampler(void)
519
+ {
520
+ VALUE rb_mObjSpace = rb_const_get(rb_cObject, rb_intern("ObjectSpace"));
521
+
522
+ rb_cAllocationSampler = rb_define_class_under(rb_mObjSpace, "AllocationSampler", rb_cObject);
523
+ rb_define_alloc_func(rb_cAllocationSampler, allocate);
524
+ rb_define_method(rb_cAllocationSampler, "initialize", initialize, -1);
525
+ rb_define_method(rb_cAllocationSampler, "enable", enable, 0);
526
+ rb_define_method(rb_cAllocationSampler, "disable", disable, 0);
527
+ rb_define_method(rb_cAllocationSampler, "frames", frames, 0);
528
+ rb_define_method(rb_cAllocationSampler, "samples", samples, 0);
529
+ rb_define_method(rb_cAllocationSampler, "interval", interval, 0);
530
+ rb_define_method(rb_cAllocationSampler, "allocation_count", allocation_count, 0);
531
+ rb_define_method(rb_cAllocationSampler, "overall_samples", overall_samples, 0);
532
+ }
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ $CFLAGS << " -Wno-declaration-after-statement"
3
+ create_makefile('allocation_sampler')
@@ -0,0 +1,226 @@
1
+ /* Obtained from https://github.com/noporpoise/sort_r on 2018-09-19 */
2
+ /* Isaac Turner 29 April 2014 Public Domain */
3
+ #ifndef SORT_R_H_
4
+ #define SORT_R_H_
5
+
6
+ #include <stdlib.h>
7
+ #include <string.h>
8
+
9
+ /*
10
+
11
+ sort_r function to be exported.
12
+
13
+ Parameters:
14
+ base is the array to be sorted
15
+ nel is the number of elements in the array
16
+ width is the size in bytes of each element of the array
17
+ compar is the comparison function
18
+ arg is a pointer to be passed to the comparison function
19
+
20
+ void sort_r(void *base, size_t nel, size_t width,
21
+ int (*compar)(const void *_a, const void *_b, void *_arg),
22
+ void *arg);
23
+
24
+ */
25
+
26
+ #define _SORT_R_INLINE inline
27
+
28
+ #if (defined __APPLE__ || defined __MACH__ || defined __DARWIN__ || \
29
+ defined __FreeBSD__ || defined __DragonFly__)
30
+ # define _SORT_R_BSD
31
+ #elif (defined _GNU_SOURCE || defined __gnu_hurd__ || defined __GNU__ || \
32
+ defined __linux__ || defined __MINGW32__ || defined __GLIBC__)
33
+ # define _SORT_R_LINUX
34
+ #elif (defined _WIN32 || defined _WIN64 || defined __WINDOWS__)
35
+ # define _SORT_R_WINDOWS
36
+ # undef _SORT_R_INLINE
37
+ # define _SORT_R_INLINE __inline
38
+ #else
39
+ /* Using our own recursive quicksort sort_r_simple() */
40
+ #endif
41
+
42
+ #if (defined NESTED_QSORT && NESTED_QSORT == 0)
43
+ # undef NESTED_QSORT
44
+ #endif
45
+
46
+ /* swap a, b iff a>b */
47
+ /* __restrict is same as restrict but better support on old machines */
48
+ static _SORT_R_INLINE int sort_r_cmpswap(char *__restrict a, char *__restrict b, size_t w,
49
+ int (*compar)(const void *_a, const void *_b,
50
+ void *_arg),
51
+ void *arg)
52
+ {
53
+ char tmp, *end = a+w;
54
+ if(compar(a, b, arg) > 0) {
55
+ for(; a < end; a++, b++) { tmp = *a; *a = *b; *b = tmp; }
56
+ return 1;
57
+ }
58
+ return 0;
59
+ }
60
+
61
+ /* Implement recursive quicksort ourselves */
62
+ /* Note: quicksort is not stable, equivalent values may be swapped */
63
+ static _SORT_R_INLINE void sort_r_simple(void *base, size_t nel, size_t w,
64
+ int (*compar)(const void *_a, const void *_b,
65
+ void *_arg),
66
+ void *arg)
67
+ {
68
+ char *b = (char *)base, *end = b + nel*w;
69
+ if(nel < 7) {
70
+ /* Insertion sort for arbitrarily small inputs */
71
+ char *pi, *pj;
72
+ for(pi = b+w; pi < end; pi += w) {
73
+ for(pj = pi; pj > b && sort_r_cmpswap(pj-w,pj,w,compar,arg); pj -= w) {}
74
+ }
75
+ }
76
+ else
77
+ {
78
+ /* nel > 6; Quicksort */
79
+
80
+ /* Use median of first, middle and last items as pivot */
81
+ char *x, *y, *xend, ch;
82
+ char *pl, *pr;
83
+ char *last = b+w*(nel-1), *tmp;
84
+ char *l[3];
85
+ l[0] = b;
86
+ l[1] = b+w*(nel/2);
87
+ l[2] = last;
88
+
89
+ if(compar(l[0],l[1],arg) > 0) { tmp=l[0]; l[0]=l[1]; l[1]=tmp; }
90
+ if(compar(l[1],l[2],arg) > 0) {
91
+ tmp=l[1]; l[1]=l[2]; l[2]=tmp; /* swap(l[1],l[2]) */
92
+ if(compar(l[0],l[1],arg) > 0) { tmp=l[0]; l[0]=l[1]; l[1]=tmp; }
93
+ }
94
+
95
+ /* swap l[id], l[2] to put pivot as last element */
96
+ for(x = l[1], y = last, xend = x+w; x<xend; x++, y++) {
97
+ ch = *x; *x = *y; *y = ch;
98
+ }
99
+
100
+ pl = b;
101
+ pr = last;
102
+
103
+ while(pl < pr) {
104
+ for(; pl < pr; pl += w) {
105
+ if(sort_r_cmpswap(pl, pr, w, compar, arg)) {
106
+ pr -= w; /* pivot now at pl */
107
+ break;
108
+ }
109
+ }
110
+ for(; pl < pr; pr -= w) {
111
+ if(sort_r_cmpswap(pl, pr, w, compar, arg)) {
112
+ pl += w; /* pivot now at pr */
113
+ break;
114
+ }
115
+ }
116
+ }
117
+
118
+ sort_r_simple(b, (pl-b)/w, w, compar, arg);
119
+ sort_r_simple(pl+w, (end-(pl+w))/w, w, compar, arg);
120
+ }
121
+ }
122
+
123
+
124
+ #if defined NESTED_QSORT
125
+
126
+ static _SORT_R_INLINE void sort_r(void *base, size_t nel, size_t width,
127
+ int (*compar)(const void *_a, const void *_b,
128
+ void *aarg),
129
+ void *arg)
130
+ {
131
+ int nested_cmp(const void *a, const void *b)
132
+ {
133
+ return compar(a, b, arg);
134
+ }
135
+
136
+ qsort(base, nel, width, nested_cmp);
137
+ }
138
+
139
+ #else /* !NESTED_QSORT */
140
+
141
+ /* Declare structs and functions */
142
+
143
+ #if defined _SORT_R_BSD
144
+
145
+ /* Ensure qsort_r is defined */
146
+ extern void qsort_r(void *base, size_t nel, size_t width, void *thunk,
147
+ int (*compar)(void *_thunk, const void *_a, const void *_b));
148
+
149
+ #endif
150
+
151
+ #if defined _SORT_R_BSD || defined _SORT_R_WINDOWS
152
+
153
+ /* BSD (qsort_r), Windows (qsort_s) require argument swap */
154
+
155
+ struct sort_r_data
156
+ {
157
+ void *arg;
158
+ int (*compar)(const void *_a, const void *_b, void *_arg);
159
+ };
160
+
161
+ static _SORT_R_INLINE int sort_r_arg_swap(void *s, const void *a, const void *b)
162
+ {
163
+ struct sort_r_data *ss = (struct sort_r_data*)s;
164
+ return (ss->compar)(a, b, ss->arg);
165
+ }
166
+
167
+ #endif
168
+
169
+ #if defined _SORT_R_LINUX
170
+
171
+ typedef int(* __compar_d_fn_t)(const void *, const void *, void *);
172
+ extern void qsort_r(void *base, size_t nel, size_t width,
173
+ __compar_d_fn_t __compar, void *arg)
174
+ __attribute__((nonnull (1, 4)));
175
+
176
+ #endif
177
+
178
+ /* implementation */
179
+
180
+ static _SORT_R_INLINE void sort_r(void *base, size_t nel, size_t width,
181
+ int (*compar)(const void *_a, const void *_b, void *_arg),
182
+ void *arg)
183
+ {
184
+ #if defined _SORT_R_LINUX
185
+
186
+ #if defined __GLIBC__ && ((__GLIBC__ < 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8))
187
+
188
+ /* no qsort_r in glibc before 2.8, need to use nested qsort */
189
+ sort_r_simple(base, nel, width, compar, arg);
190
+
191
+ #else
192
+
193
+ qsort_r(base, nel, width, compar, arg);
194
+
195
+ #endif
196
+
197
+ #elif defined _SORT_R_BSD
198
+
199
+ struct sort_r_data tmp;
200
+ tmp.arg = arg;
201
+ tmp.compar = compar;
202
+ qsort_r(base, nel, width, &tmp, sort_r_arg_swap);
203
+
204
+ #elif defined _SORT_R_WINDOWS
205
+
206
+ struct sort_r_data tmp;
207
+ tmp.arg = arg;
208
+ tmp.compar = compar;
209
+ qsort_s(base, nel, width, sort_r_arg_swap, &tmp);
210
+
211
+ #else
212
+
213
+ /* Fall back to our own quicksort implementation */
214
+ sort_r_simple(base, nel, width, compar, arg);
215
+
216
+ #endif
217
+ }
218
+
219
+ #endif /* !NESTED_QSORT */
220
+
221
+ #undef _SORT_R_INLINE
222
+ #undef _SORT_R_WINDOWS
223
+ #undef _SORT_R_LINUX
224
+ #undef _SORT_R_BSD
225
+
226
+ #endif /* SORT_R_H_ */
@@ -0,0 +1,210 @@
1
+ require 'allocation_sampler.so'
2
+ require 'delegate'
3
+ require 'set'
4
+ require 'cgi/escape'
5
+
6
+ module ObjectSpace
7
+ class AllocationSampler
8
+ VERSION = '1.0.0'
9
+
10
+ class Frame
11
+ attr_reader :id, :name, :path, :children
12
+
13
+ def initialize id, name, path
14
+ @id = id
15
+ @name = name
16
+ @path = path
17
+ end
18
+ end
19
+
20
+ class Result
21
+ class Frame < DelegateClass(AllocationSampler::Frame)
22
+ attr_reader :line, :children
23
+ attr_accessor :samples, :total_samples
24
+
25
+ include Enumerable
26
+
27
+ def initialize frame, line, samples
28
+ super(frame)
29
+ @line = line
30
+ @samples = samples
31
+ @total_samples = 0
32
+ @children = Set.new
33
+ end
34
+
35
+ def each
36
+ seen = {}
37
+ stack = [self]
38
+
39
+ while node = stack.pop
40
+ next if seen[node]
41
+ seen[node] = true
42
+ yield node
43
+ stack.concat node.children.to_a
44
+ end
45
+ end
46
+
47
+ def to_dot
48
+ seen = {}
49
+ "digraph allocations {\n" +
50
+ " node[shape=record];\n" + print_edges(self, seen, total_samples) + "}\n"
51
+ end
52
+
53
+ private
54
+
55
+ def print_edges node, seen, total_samples
56
+ return '' if seen[node.id]
57
+ seen[node.id] = node
58
+ " #{node.id} [label=\"#{CGI.escapeHTML node.name}\"];\n" +
59
+ node.children.map { |child|
60
+ ratio = child.total_samples / total_samples.to_f
61
+ width = (1 * ratio) + 1
62
+ " #{node.id} -> #{child.id} [penwidth=#{width}];\n" + print_edges(child, seen, total_samples)
63
+ }.join
64
+ end
65
+ end
66
+
67
+ attr_reader :samples, :frames
68
+
69
+ def initialize samples, frames
70
+ @samples = samples.sort_by! { |s| s[1] }.reverse!
71
+ @frames = frames
72
+ end
73
+
74
+ def allocations_by_type
75
+ @samples.each_with_object(Hash.new(0)) do |(type, count, _), h|
76
+ h[type] += count
77
+ end
78
+ end
79
+
80
+ def allocations_with_top_frame
81
+ @samples.each_with_object({}) do |(type, count, stack), h|
82
+ top_frame_id, line = stack.first
83
+ frame = @frames[top_frame_id]
84
+ ((h[type] ||= {})[frame.path] ||= {})[line] = count
85
+ end
86
+ end
87
+
88
+ def calltree
89
+ frame_delegates = {}
90
+ @samples.map { |type, count, stack|
91
+ build_tree(stack, count, frame_delegates)
92
+ }.uniq.first
93
+ end
94
+
95
+ def by_type_with_call_tree
96
+ types_with_stacks = @samples.group_by(&:first)
97
+ types_with_stacks.transform_values do |stacks|
98
+ frame_delegates = {}
99
+ stacks.map { |_, count, stack|
100
+ build_tree(stack, count, frame_delegates)
101
+ }.uniq.first
102
+ end
103
+ end
104
+
105
+ private
106
+
107
+ def build_tree stack, count, frame_delegates
108
+ top_down = stack.reverse
109
+ last_caller = nil
110
+ seen = Set.new
111
+ root = nil
112
+ top_frame_id, top_line = stack.first
113
+ top = frame_delegates[top_frame_id] ||= build_frame(top_frame_id, top_line, 0)
114
+ top.samples += count
115
+ top_down.each do |frame_id, line|
116
+ frame = frame_delegates[frame_id] ||= build_frame(frame_id, line, 0)
117
+ root ||= frame
118
+ if last_caller
119
+ last_caller.children << frame
120
+ end
121
+ last_caller = frame
122
+ last_caller.total_samples += count unless seen.include?(frame_id)
123
+ seen << frame_id
124
+ end
125
+ root
126
+ end
127
+
128
+ def build_frame id, line, samples
129
+ Frame.new @frames[id], line, samples
130
+ end
131
+ end
132
+
133
+ def result
134
+ Result.new samples, frames
135
+ end
136
+
137
+ module Display
138
+ class Stack < DelegateClass(IO)
139
+ attr_reader :max_depth
140
+
141
+ def initialize output: $stdout
142
+ super(output)
143
+ end
144
+
145
+ def show frames
146
+ max_width = max_width(frames, 0, {})
147
+ display(frames, 0, frames.total_samples, [], {}, max_width)
148
+ end
149
+
150
+ private
151
+
152
+ def max_width frame, depth, seen
153
+ if seen.key? frame
154
+ return 0
155
+ end
156
+
157
+ seen[frame] = true
158
+
159
+ my_length = (depth * 4) + frame.name.length
160
+
161
+ frame.children.each do |caller|
162
+ child_len = max_width caller, depth + 1, seen
163
+ my_length = child_len if my_length < child_len
164
+ end
165
+
166
+ my_length
167
+ end
168
+
169
+ def display frame, depth, total_samples, last_stack, seen, max_width
170
+ seen[frame] = true
171
+
172
+
173
+ buffer = max_width - ((depth * 4) + frame.name.length)
174
+
175
+ self_samples = frame.samples
176
+ last_stack.each_with_index do |last, i|
177
+ if i == last_stack.length - 1
178
+ if last
179
+ printf "`-- "
180
+ else
181
+ printf "|-- "
182
+ end
183
+ else
184
+ if last
185
+ printf " "
186
+ else
187
+ printf "| "
188
+ end
189
+ end
190
+ end
191
+
192
+
193
+ printf frame.name
194
+ printf " " * buffer
195
+ printf "% d % 8s", self_samples, "(%2.1f%%)" % (self_samples*100.0/total_samples)
196
+ puts
197
+
198
+ children = (frame.children || []).sort_by { |ie|
199
+ -ie.total_samples
200
+ }.reject { |f| seen[f] }
201
+
202
+ children.each_with_index do |child, i|
203
+ s = last_stack + [i == children.length - 1]
204
+ display child, depth + 1, total_samples, s, seen, max_width
205
+ end
206
+ end
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,170 @@
1
+ require 'minitest/autorun'
2
+ require 'allocation_sampler'
3
+
4
+ class TestAllocationSampler < Minitest::Test
5
+ def test_initialize
6
+ assert ObjectSpace::AllocationSampler.new
7
+ end
8
+
9
+ def test_init_with_params
10
+ as = ObjectSpace::AllocationSampler.new(interval: 10)
11
+ assert_equal 10, as.interval
12
+ end
13
+
14
+ def test_init_with_location
15
+ iseq = RubyVM::InstructionSequence.new <<-eoruby
16
+ Object.new
17
+ Object.new
18
+ eoruby
19
+ as = ObjectSpace::AllocationSampler.new(interval: 1)
20
+ as.enable
21
+ iseq.eval
22
+ as.disable
23
+
24
+ assert_equal({"Object"=>{"<compiled>"=>{1=>1, 2=>1}}}, filter(as.result))
25
+ end
26
+
27
+ def test_location_same_line
28
+ iseq = RubyVM::InstructionSequence.new <<-eoruby
29
+ 10.times { Object.new }
30
+ eoruby
31
+ as = ObjectSpace::AllocationSampler.new(interval: 1)
32
+ as.enable
33
+ iseq.eval
34
+ as.disable
35
+
36
+ assert_equal({"Object"=>{"<compiled>"=>{1=>10}}}, filter(as.result))
37
+ end
38
+
39
+ def test_location_mixed
40
+ iseq = RubyVM::InstructionSequence.new <<-eoruby
41
+ 10.times { Object.new }
42
+ Object.new
43
+ eoruby
44
+ as = ObjectSpace::AllocationSampler.new(interval: 1)
45
+ as.enable
46
+ iseq.eval
47
+ as.disable
48
+
49
+ assert_equal({"Object"=>{"<compiled>"=>{1=>10, 2=>1}}}, filter(as.result))
50
+ end
51
+
52
+ def test_location_from_method
53
+ iseq = RubyVM::InstructionSequence.new <<-eoruby
54
+ def foo
55
+ 10.times { Object.new }
56
+ Object.new
57
+ end
58
+ foo
59
+ eoruby
60
+ as = ObjectSpace::AllocationSampler.new(interval: 1)
61
+ as.enable
62
+ iseq.eval
63
+ as.disable
64
+
65
+ assert_equal({"Object"=>{"<compiled>"=>{2=>10, 3=>1}}}, filter(as.result))
66
+ end
67
+
68
+ def test_location_larger_interval
69
+ iseq = RubyVM::InstructionSequence.new <<-eom
70
+ 100.times { Object.new }
71
+ 100.times { Object.new }
72
+ eom
73
+ as = ObjectSpace::AllocationSampler.new(interval: 10)
74
+ as.enable
75
+ iseq.eval
76
+ as.disable
77
+
78
+ assert_equal({"Object"=>{"<compiled>"=>{1=>10, 2=>10}}}, filter(as.result))
79
+ assert_equal 201, as.allocation_count
80
+ end
81
+
82
+ def test_interval_default
83
+ as = ObjectSpace::AllocationSampler.new
84
+ assert_equal 1, as.interval
85
+ end
86
+
87
+ def test_two_with_same_type
88
+ as = ObjectSpace::AllocationSampler.new
89
+ as.enable
90
+ Object.new
91
+ Object.new
92
+ as.disable
93
+
94
+ assert_equal(2, filter(as.result)[Object.name].values.flat_map(&:values).inject(:+))
95
+ end
96
+
97
+ def test_two_with_same_type_same_line
98
+ as = ObjectSpace::AllocationSampler.new
99
+ as.enable
100
+ Object.new; Object.new
101
+ Object.new; Object.new
102
+ as.disable
103
+
104
+ assert_equal 4, as.result.allocations_by_type[Object.name]
105
+ end
106
+
107
+ class X
108
+ end
109
+
110
+ def test_expands
111
+ as = ObjectSpace::AllocationSampler.new
112
+ as.enable
113
+ 500.times do
114
+ Object.new
115
+ X.new
116
+ end
117
+ Object.new
118
+ as.disable
119
+
120
+ result = as.result
121
+ assert_equal 501, result.allocations_by_type[Object.name]
122
+ assert_equal 500, result.allocations_by_type[TestAllocationSampler::X.name]
123
+ end
124
+
125
+ def d
126
+ Object.new
127
+ end
128
+ def c; 5.times { d }; end
129
+ def b; 5.times { c }; end
130
+ def a; 5.times { b }; end
131
+
132
+ def test_stack_trace
133
+ as = ObjectSpace::AllocationSampler.new
134
+ buffer = StringIO.new
135
+ stack_printer = ObjectSpace::AllocationSampler::Display::Stack.new(
136
+ output: buffer
137
+ )
138
+ as.enable
139
+ a
140
+ as.disable
141
+
142
+ as.result.by_type_with_call_tree.each do |class_name, tree|
143
+ assert_equal Object.name, class_name
144
+ root = tree.find { |node| node.name.include? __method__.to_s }
145
+ stack_printer.show root
146
+ end
147
+ assert_equal <<-eoout, buffer.string
148
+ TestAllocationSampler#test_stack_trace 0 (0.0%)
149
+ `-- TestAllocationSampler#a 0 (0.0%)
150
+ `-- TestAllocationSampler#b 0 (0.0%)
151
+ `-- TestAllocationSampler#c 0 (0.0%)
152
+ `-- TestAllocationSampler#d 125 (100.0%)
153
+ eoout
154
+ end
155
+
156
+ def test_dot
157
+ as = ObjectSpace::AllocationSampler.new
158
+ as.enable
159
+ a
160
+ as.disable
161
+
162
+ File.write 'out.dot', as.result.calltree.to_dot
163
+ end
164
+
165
+ private
166
+
167
+ def filter result
168
+ result.allocations_with_top_frame
169
+ end
170
+ end
metadata ADDED
@@ -0,0 +1,109 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: allocation_sampler
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Aaron Patterson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-01-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: minitest
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '5.11'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '5.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rdoc
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '4.0'
34
+ - - "<"
35
+ - !ruby/object:Gem::Version
36
+ version: '7'
37
+ type: :development
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '4.0'
44
+ - - "<"
45
+ - !ruby/object:Gem::Version
46
+ version: '7'
47
+ - !ruby/object:Gem::Dependency
48
+ name: hoe
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '3.17'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '3.17'
61
+ description: |-
62
+ A sampling allocation profiler. This keeps track of allocations, but only on
63
+ specified intervals. Useful for profiling allocations in programs where there
64
+ is a time limit on completion of the program.
65
+ email:
66
+ - aaron@tenderlovemaking.com
67
+ executables: []
68
+ extensions:
69
+ - ext/allocation_sampler/extconf.rb
70
+ extra_rdoc_files:
71
+ - CHANGELOG.md
72
+ - Manifest.txt
73
+ - README.md
74
+ files:
75
+ - CHANGELOG.md
76
+ - Manifest.txt
77
+ - README.md
78
+ - Rakefile
79
+ - ext/allocation_sampler/allocation_sampler.c
80
+ - ext/allocation_sampler/extconf.rb
81
+ - ext/allocation_sampler/sort_r.h
82
+ - lib/allocation_sampler.rb
83
+ - test/test_allocation_sampler.rb
84
+ homepage: https://github.com/tenderlove/allocation_sampler
85
+ licenses:
86
+ - MIT
87
+ metadata: {}
88
+ post_install_message:
89
+ rdoc_options:
90
+ - "--main"
91
+ - README.md
92
+ require_paths:
93
+ - lib
94
+ required_ruby_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ required_rubygems_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubygems_version: 3.0.2
106
+ signing_key:
107
+ specification_version: 4
108
+ summary: A sampling allocation profiler
109
+ test_files: []