mwrap 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '049853543dbde8f93776318db059ac2d40ee82a241d080a8236a7d43efade454'
4
- data.tar.gz: a29b74fef93affeb9ffa30276fe9f75f9f5952cbfae142a749ba1b9409153789
3
+ metadata.gz: 8559afec1946b7f545c944085aac5e205601deb82fa9f1529785dd3ef7526e5a
4
+ data.tar.gz: 34ea90410103ec59f367baa00e1e4df6424fe67af8608b5c7a3a0cb66fec2440
5
5
  SHA512:
6
- metadata.gz: 52bad7de38eb6b256b1cecbb9a7a83da094602adf4162730fed0d58ff3fa3f605fcef77e724b3a3e8557bbbce597a9369d3673f2be2d176ddc841dc2cc3c2208
7
- data.tar.gz: 2b446a6a8ceef8cc5b7a34211fc2d7cd07e50692bcca6fe4a13d115b359b8fde7042ae1a6f7c53829f2b65fb9fd4158492e1dd142633a30c672eccb6120a1d71
6
+ metadata.gz: ff91f6f250e7a18cb465a6dc04ddee374411a10a92cd1b3227e09a4ccd0be1b8597f475f71d5061bd5bc36e8c0418ce3bf6faa3a78b37aec5d21b0340f736031
7
+ data.tar.gz: 06311f2949ae4dae6bd2d0e95e6b0d147b44b6bae6cd4464e4c1d160fc1f5f3f99b2a6a3a3e711a10b8894da3bdb6220ccfeb72dfb18545170edbbc65dbad225
@@ -0,0 +1,2 @@
1
+ ext/mwrap/mwrap.c
2
+ lib/mwrap_rack.rb
data/MANIFEST CHANGED
@@ -8,5 +8,6 @@ bin/mwrap
8
8
  ext/mwrap/extconf.rb
9
9
  ext/mwrap/jhash.h
10
10
  ext/mwrap/mwrap.c
11
+ lib/mwrap_rack.rb
11
12
  mwrap.gemspec
12
13
  test/test_mwrap.rb
data/README CHANGED
@@ -4,20 +4,23 @@ mwrap is designed to answer the question:
4
4
 
5
5
  Which lines of Ruby are hitting malloc the most?
6
6
 
7
- mwrap wraps all malloc, calloc, and realloc calls to trace the Ruby
8
- source location of such calls and bytes allocated at each callsite.
9
- This functionality may be expanded in the future.
10
-
11
- It does not track allocation lifetimes, or frees, however. It works
12
- best for allocations under GVL, but tries to track numeric caller
13
- addresses for allocations made without GVL so you can get an idea of how
14
- much memory usage certain extensions and native libraries use.
7
+ mwrap wraps all malloc-family calls to trace the Ruby source
8
+ location of such calls and bytes allocated at each callsite.
9
+ As of mwrap 2.0.0, it can also function as a leak detector
10
+ and show live allocations at every call site. Depending on
11
+ your application and workload, the overhead is roughly a 50%
12
+ increase memory and runtime.
13
+
14
+ It works best for allocations under GVL, but tries to track
15
+ numeric caller addresses for allocations made without GVL so you
16
+ can get an idea of how much memory usage certain extensions and
17
+ native libraries use.
15
18
 
16
19
  It requires the concurrent lock-free hash table from the
17
20
  Userspace RCU project: https://liburcu.org/
18
21
 
19
- It does not require recompiling or rebuilding Ruby, but only supports
20
- Ruby trunk (2.6.0dev+) on a few platforms:
22
+ It does not require recompiling or rebuilding Ruby, but only
23
+ supports Ruby trunk (2.6.0dev+) on a few platforms:
21
24
 
22
25
  * GNU/Linux
23
26
  * FreeBSD (tested 11.1)
@@ -36,28 +39,35 @@ It may work on NetBSD, OpenBSD and DragonFly BSD.
36
39
  == Usage
37
40
 
38
41
  mwrap works as an LD_PRELOAD and supplies a mwrap RubyGem executable to
39
- improve ease-of-use. You can set dump_fd: in the MWRAP environment
40
- variable to dump the results to a certain file descriptor at exit:
41
-
42
- # Dump results to stderr at exit:
43
- MWRAP=dump_fd:2 mwrap RUBY_COMMAND
44
-
45
- You may also set dump_path to append to a log file:
42
+ improve ease-of-use. You can set dump_path: in the MWRAP environment
43
+ variable to append the results to a log file:
46
44
 
47
45
  MWRAP=dump_path:/path/to/log mwrap RUBY_COMMAND
48
46
 
49
- You may also `require 'mwrap'' in your Ruby code and use
50
- Mwrap.dump, Mwrap.clear, Mwrap.reset, Mwrap.each, etc.
47
+ # And to display the locations with the most allocations:
48
+ sort -k1,1rn </path/to/log | $PAGER
49
+
50
+ You may also `require "mwrap"' in your Ruby code and use
51
+ Mwrap.dump, Mwrap.reset, Mwrap.each, etc.
51
52
 
52
53
  However, mwrap MUST be loaded via LD_PRELOAD to have any
53
- effect in tracking malloc use.
54
+ effect in tracking malloc use. However, it is safe to keep
55
+ "require 'mwrap'" in performance-critical deployments,
56
+ as overhead is only incurred when used as an LD_PRELOAD.
54
57
 
55
58
  The output of the mwrap dump is a text file with 3 columns:
56
59
 
57
60
  total_bytes call_count location
58
61
 
59
62
  Where location is a Ruby source location (if made under GVL)
60
- or an address retrieved by backtrace_symbols(3)
63
+ or an address retrieved by backtrace_symbols(3). It is
64
+ recommended to use the sort(1) command on either of the
65
+ first two columns to find the hottest malloc locations.
66
+
67
+ mwrap 2.0.0+ also supports a Rack application endpoint,
68
+ it is documented at:
69
+
70
+ https://80x24.org/mwrap/MwrapRack.html
61
71
 
62
72
  == Known problems
63
73
 
data/bin/mwrap CHANGED
@@ -26,4 +26,11 @@ if ENV['MWRAP'] =~ /dump_fd:(\d+)/
26
26
  opts[dump_fd] = dump_io
27
27
  end
28
28
  end
29
+
30
+ # allow inheriting FDs from systemd
31
+ n = ENV['LISTEN_FDS']
32
+ if n && ENV['LISTEN_PID'].to_i == $$
33
+ n = 3 + n.to_i
34
+ (3...n).each { |fd| opts[fd] = IO.new(fd) }
35
+ end
29
36
  exec *ARGV, opts
@@ -10,4 +10,19 @@ have_library 'urcu-bp' or abort 'liburcu-bp not found'
10
10
  have_library 'dl'
11
11
  have_library 'c'
12
12
  have_library 'execinfo' # FreeBSD
13
+
14
+ if try_link(<<'')
15
+ int main(void) { return __builtin_add_overflow_p(0,0,(int)1); }
16
+
17
+ $defs << '-DHAVE_BUILTIN_ADD_OVERFLOW_P'
18
+ end
19
+
20
+ if try_link(<<'')
21
+ int main(int a) { return __builtin_add_overflow(0,0,&a); }
22
+
23
+ $defs << '-DHAVE_BUILTIN_ADD_OVERFLOW_P'
24
+ else
25
+ abort 'missing __builtin_add_overflow'
26
+ end
27
+
13
28
  create_makefile 'mwrap'
@@ -16,26 +16,40 @@
16
16
  #include <sys/types.h>
17
17
  #include <sys/stat.h>
18
18
  #include <fcntl.h>
19
+ #include <pthread.h>
19
20
  #include <urcu-bp.h>
20
21
  #include <urcu/rculfhash.h>
22
+ #include <urcu/rculist.h>
21
23
  #include "jhash.h"
22
24
 
23
25
  static ID id_uminus;
26
+ static unsigned int track_memalign;
24
27
  const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev */
25
- static int *(*has_gvl_p)(void);
28
+ extern int __attribute__((weak)) ruby_thread_has_gvl_p(void);
29
+ extern void * __attribute__((weak)) ruby_current_execution_context_ptr;
30
+ extern void * __attribute__((weak)) ruby_current_vm_ptr; /* for rb_gc_count */
31
+ extern size_t __attribute__((weak)) rb_gc_count(void);
32
+ extern VALUE __attribute__((weak)) rb_cObject;
33
+ extern VALUE __attribute__((weak)) rb_yield(VALUE);
34
+
35
+ /* true for glibc/dlmalloc/ptmalloc, not sure about jemalloc */
36
+ #define ASSUMED_MALLOC_ALIGNMENT (sizeof(void *) * 2)
37
+
38
+ int __attribute__((weak)) ruby_thread_has_gvl_p(void)
39
+ {
40
+ return 0;
41
+ }
42
+
26
43
  #ifdef __FreeBSD__
27
44
  void *__malloc(size_t);
28
- void *__calloc(size_t, size_t);
29
- void *__realloc(void *, size_t);
30
- static void *(*real_malloc)(size_t) = __malloc;
31
- static void *(*real_calloc)(size_t, size_t) = __calloc;
32
- static void *(*real_realloc)(void *, size_t) = __realloc;
33
- # define RETURN_IF_NOT_READY() do {} while (0) /* nothing */
45
+ void __free(void *);
46
+ # define real_malloc __malloc
47
+ # define real_free __free
34
48
  #else
35
- static int ready;
36
49
  static void *(*real_malloc)(size_t);
37
- static void *(*real_calloc)(size_t, size_t);
38
- static void *(*real_realloc)(void *, size_t);
50
+ static void (*real_free)(void *);
51
+ static int resolving_malloc;
52
+ #endif /* !FreeBSD */
39
53
 
40
54
  /*
41
55
  * we need to fake an OOM condition while dlsym is running,
@@ -43,22 +57,33 @@ static void *(*real_realloc)(void *, size_t);
43
57
  * symbol for the jemalloc calloc, yet
44
58
  */
45
59
  # define RETURN_IF_NOT_READY() do { \
46
- if (!ready) { \
60
+ if (!real_malloc) { \
47
61
  errno = ENOMEM; \
48
62
  return NULL; \
49
63
  } \
50
64
  } while (0)
51
65
 
52
- #endif /* !FreeBSD */
66
+ static __thread size_t locating;
67
+ static size_t generation;
68
+ static size_t page_size;
69
+ static struct cds_lfht *totals;
70
+ union padded_mutex {
71
+ pthread_mutex_t mtx;
72
+ char pad[64];
73
+ };
53
74
 
54
- /*
55
- * rb_source_location_cstr relies on GET_EC(), and it's possible
56
- * to have a native thread but no EC during the early and late
57
- * (teardown) phases of the Ruby process
58
- */
59
- static void **ec_loc;
75
+ /* a round-robin pool of mutexes */
76
+ #define MUTEX_NR (1 << 6)
77
+ #define MUTEX_MASK (MUTEX_NR - 1)
78
+ static size_t mutex_i;
79
+ static union padded_mutex mutexes[MUTEX_NR] = {
80
+ [0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
81
+ };
60
82
 
61
- static struct cds_lfht *totals;
83
+ static pthread_mutex_t *mutex_assign(void)
84
+ {
85
+ return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx;
86
+ }
62
87
 
63
88
  static struct cds_lfht *
64
89
  lfht_new(void)
@@ -69,19 +94,40 @@ lfht_new(void)
69
94
  __attribute__((constructor)) static void resolve_malloc(void)
70
95
  {
71
96
  int err;
97
+ const char *opt;
98
+ ++locating;
72
99
 
73
- #ifndef __FreeBSD__
74
- real_malloc = dlsym(RTLD_NEXT, "malloc");
75
- real_calloc = dlsym(RTLD_NEXT, "calloc");
76
- real_realloc = dlsym(RTLD_NEXT, "realloc");
77
- if (!real_calloc || !real_malloc || !real_realloc) {
78
- fprintf(stderr, "missing calloc/malloc/realloc %p %p %p\n",
79
- real_calloc, real_malloc, real_realloc);
100
+ #ifdef __FreeBSD__
101
+ /*
102
+ * PTHREAD_MUTEX_INITIALIZER on FreeBSD means lazy initialization,
103
+ * which happens at pthread_mutex_lock, and that calls calloc
104
+ */
105
+ {
106
+ size_t i;
107
+
108
+ for (i = 0; i < MUTEX_NR; i++) {
109
+ err = pthread_mutex_init(&mutexes[i].mtx, 0);
110
+ if (err) {
111
+ fprintf(stderr, "error: %s\n", strerror(err));
112
+ _exit(1);
113
+ }
114
+ }
115
+ /* initialize mutexes used by urcu-bp */
116
+ rcu_read_lock();
117
+ rcu_read_unlock();
118
+ }
119
+ #else /* !FreeBSD (tested on GNU/Linux) */
120
+ if (!real_malloc) {
121
+ resolving_malloc = 1;
122
+ real_malloc = dlsym(RTLD_NEXT, "malloc");
123
+ }
124
+ real_free = dlsym(RTLD_NEXT, "free");
125
+ if (!real_malloc || !real_free) {
126
+ fprintf(stderr, "missing malloc/aligned_alloc/free\n"
127
+ "\t%p %p\n", real_malloc, real_free);
80
128
  _exit(1);
81
129
  }
82
- ready = 1;
83
- #endif
84
-
130
+ #endif /* !FreeBSD */
85
131
  totals = lfht_new();
86
132
  if (!totals)
87
133
  fprintf(stderr, "failed to allocate totals table\n");
@@ -91,14 +137,27 @@ __attribute__((constructor)) static void resolve_malloc(void)
91
137
  call_rcu_after_fork_child);
92
138
  if (err)
93
139
  fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err));
140
+ page_size = sysconf(_SC_PAGESIZE);
141
+ opt = getenv("MWRAP");
142
+ if (opt && (opt = strstr(opt, "memalign:"))) {
143
+ if (!sscanf(opt, "memalign:%u", &track_memalign))
144
+ fprintf(stderr, "not an unsigned int: %s\n", opt);
145
+ }
146
+ --locating;
147
+ }
94
148
 
95
- has_gvl_p = dlsym(RTLD_DEFAULT, "ruby_thread_has_gvl_p");
149
+ static void
150
+ mutex_lock(pthread_mutex_t *m)
151
+ {
152
+ int err = pthread_mutex_lock(m);
153
+ assert(err == 0);
154
+ }
96
155
 
97
- /*
98
- * resolve dynamically so it doesn't break when LD_PRELOAD-ed
99
- * into non-Ruby binaries
100
- */
101
- ec_loc = dlsym(RTLD_DEFAULT, "ruby_current_execution_context_ptr");
156
+ static void
157
+ mutex_unlock(pthread_mutex_t *m)
158
+ {
159
+ int err = pthread_mutex_unlock(m);
160
+ assert(err == 0);
102
161
  }
103
162
 
104
163
  #ifndef HAVE_MEMPCPY
@@ -114,8 +173,6 @@ my_mempcpy(void *dest, const void *src, size_t n)
114
173
  #define RETURN_ADDRESS(nr) \
115
174
  (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
116
175
 
117
- static __thread size_t locating;
118
-
119
176
  #define INT2STR_MAX (sizeof(int) == 4 ? 10 : 19)
120
177
  static char *int2str(int num, char *dst, size_t * size)
121
178
  {
@@ -143,21 +200,58 @@ static char *int2str(int num, char *dst, size_t * size)
143
200
  return NULL;
144
201
  }
145
202
 
203
+ /*
204
+ * rb_source_location_cstr relies on GET_EC(), and it's possible
205
+ * to have a native thread but no EC during the early and late
206
+ * (teardown) phases of the Ruby process
207
+ */
146
208
  static int has_ec_p(void)
147
209
  {
148
- return (ec_loc && *ec_loc);
210
+ return (ruby_thread_has_gvl_p() && ruby_current_vm_ptr &&
211
+ ruby_current_execution_context_ptr);
149
212
  }
150
213
 
214
+ /* allocated via real_malloc/real_free */
151
215
  struct src_loc {
152
- struct rcu_head rcu_head;
153
- size_t calls;
216
+ pthread_mutex_t *mtx;
154
217
  size_t total;
218
+ size_t allocations;
219
+ size_t frees;
220
+ size_t age_total; /* (age_total / frees) => mean age at free */
221
+ size_t max_lifespan;
155
222
  struct cds_lfht_node hnode;
223
+ struct cds_list_head allocs; /* <=> alloc_hdr.node */
156
224
  uint32_t hval;
157
225
  uint32_t capa;
158
226
  char k[];
159
227
  };
160
228
 
229
+ /* every allocation has this in the header, maintain alignment with malloc */
230
+ struct alloc_hdr {
231
+ struct cds_list_head anode; /* <=> src_loc.allocs */
232
+ union {
233
+ struct {
234
+ size_t gen; /* rb_gc_count() */
235
+ struct src_loc *loc;
236
+ } live;
237
+ struct rcu_head dead;
238
+ } as;
239
+ void *real; /* what to call real_free on */
240
+ size_t size;
241
+ };
242
+
243
+ static char kbuf[PATH_MAX + INT2STR_MAX + sizeof(struct alloc_hdr) + 2];
244
+
245
+ static struct alloc_hdr *ptr2hdr(void *p)
246
+ {
247
+ return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr));
248
+ }
249
+
250
+ static void *hdr2ptr(struct alloc_hdr *h)
251
+ {
252
+ return (void *)((uintptr_t)h + sizeof(struct alloc_hdr));
253
+ }
254
+
161
255
  static int loc_is_addr(const struct src_loc *l)
162
256
  {
163
257
  return l->capa == 0;
@@ -180,16 +274,14 @@ static int loc_eq(struct cds_lfht_node *node, const void *key)
180
274
  memcmp(k->k, existing->k, loc_size(k)) == 0);
181
275
  }
182
276
 
183
- static void totals_add(struct src_loc *k)
277
+ static struct src_loc *totals_add_rcu(struct src_loc *k)
184
278
  {
185
279
  struct cds_lfht_iter iter;
186
280
  struct cds_lfht_node *cur;
187
- struct src_loc *l;
281
+ struct src_loc *l = 0;
188
282
  struct cds_lfht *t;
189
283
 
190
-
191
284
  again:
192
- rcu_read_lock();
193
285
  t = rcu_dereference(totals);
194
286
  if (!t) goto out_unlock;
195
287
  cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
@@ -197,44 +289,58 @@ again:
197
289
  if (cur) {
198
290
  l = caa_container_of(cur, struct src_loc, hnode);
199
291
  uatomic_add(&l->total, k->total);
200
- uatomic_add(&l->calls, 1);
292
+ uatomic_add(&l->allocations, 1);
201
293
  } else {
202
294
  size_t n = loc_size(k);
203
- l = malloc(sizeof(*l) + n);
295
+ l = real_malloc(sizeof(*l) + n);
204
296
  if (!l) goto out_unlock;
205
-
206
297
  memcpy(l, k, sizeof(*l) + n);
207
- l->calls = 1;
298
+ l->mtx = mutex_assign();
299
+ l->age_total = 0;
300
+ l->max_lifespan = 0;
301
+ l->frees = 0;
302
+ l->allocations = 1;
303
+ CDS_INIT_LIST_HEAD(&l->allocs);
208
304
  cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
209
305
  if (cur != &l->hnode) { /* lost race */
210
306
  rcu_read_unlock();
211
- free(l);
307
+ real_free(l);
308
+ rcu_read_lock();
212
309
  goto again;
213
310
  }
214
311
  }
215
312
  out_unlock:
216
- rcu_read_unlock();
313
+ return l;
314
+ }
315
+
316
+ static void update_stats_rcu_unlock(const struct src_loc *l)
317
+ {
318
+ if (caa_likely(l)) rcu_read_unlock();
217
319
  }
218
320
 
219
- static void update_stats(size_t size, uintptr_t caller)
321
+ static struct src_loc *update_stats_rcu_lock(size_t size, uintptr_t caller)
220
322
  {
221
- struct src_loc *k;
323
+ struct src_loc *k, *ret = 0;
222
324
  static const size_t xlen = sizeof(caller);
223
325
  char *dst;
224
326
 
327
+ if (caa_unlikely(!totals)) return 0;
225
328
  if (locating++) goto out; /* do not recurse into another *alloc */
226
329
 
227
- if (has_gvl_p && has_gvl_p() && has_ec_p()) {
330
+ rcu_read_lock();
331
+ if (has_ec_p()) {
228
332
  int line;
229
333
  const char *ptr = rb_source_location_cstr(&line);
230
334
  size_t len;
231
335
  size_t int_size = INT2STR_MAX;
232
336
 
337
+ generation = rb_gc_count();
338
+
233
339
  if (!ptr) goto unknown;
234
340
 
235
341
  /* avoid vsnprintf or anything which could call malloc here: */
236
342
  len = strlen(ptr);
237
- k = alloca(sizeof(*k) + len + 1 + int_size + 1);
343
+ k = (void *)kbuf;
238
344
  k->total = size;
239
345
  dst = mempcpy(k->k, ptr, len);
240
346
  *dst++ = ':';
@@ -243,7 +349,7 @@ static void update_stats(size_t size, uintptr_t caller)
243
349
  *dst = 0; /* terminate string */
244
350
  k->capa = (uint32_t)(dst - k->k + 1);
245
351
  k->hval = jhash(k->k, k->capa, 0xdeadbeef);
246
- totals_add(k);
352
+ ret = totals_add_rcu(k);
247
353
  } else {
248
354
  rb_bug("bad math making key from location %s:%d\n",
249
355
  ptr, line);
@@ -255,36 +361,281 @@ unknown:
255
361
  memcpy(k->k, &caller, xlen);
256
362
  k->capa = 0;
257
363
  k->hval = jhash(k->k, xlen, 0xdeadbeef);
258
- totals_add(k);
364
+ ret = totals_add_rcu(k);
259
365
  }
260
366
  out:
261
367
  --locating;
368
+ return ret;
369
+ }
370
+
371
+ size_t malloc_usable_size(void *p)
372
+ {
373
+ return ptr2hdr(p)->size;
374
+ }
375
+
376
+ static void
377
+ free_hdr_rcu(struct rcu_head *dead)
378
+ {
379
+ struct alloc_hdr *h = caa_container_of(dead, struct alloc_hdr, as.dead);
380
+ real_free(h->real);
381
+ }
382
+
383
+ void free(void *p)
384
+ {
385
+ if (p) {
386
+ struct alloc_hdr *h = ptr2hdr(p);
387
+ struct src_loc *l = h->as.live.loc;
388
+
389
+ if (!real_free) return; /* oh well, leak a little */
390
+ if (l) {
391
+ size_t age = generation - h->as.live.gen;
392
+
393
+ uatomic_set(&h->size, 0);
394
+ uatomic_add(&l->frees, 1);
395
+ uatomic_add(&l->age_total, age);
396
+
397
+ mutex_lock(l->mtx);
398
+ cds_list_del_rcu(&h->anode);
399
+ if (age > l->max_lifespan)
400
+ l->max_lifespan = age;
401
+ mutex_unlock(l->mtx);
402
+
403
+ call_rcu(&h->as.dead, free_hdr_rcu);
404
+ }
405
+ else {
406
+ real_free(h->real);
407
+ }
408
+ }
409
+ }
410
+
411
+ static void
412
+ alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real)
413
+ {
414
+ /* we need src_loc to remain alive for the duration of this call */
415
+ if (!h) return;
416
+ h->size = size;
417
+ h->real = real;
418
+ h->as.live.loc = l;
419
+ h->as.live.gen = generation;
420
+ if (l) {
421
+ mutex_lock(l->mtx);
422
+ cds_list_add_rcu(&h->anode, &l->allocs);
423
+ mutex_unlock(l->mtx);
424
+ }
425
+ }
426
+
427
+ static size_t size_align(size_t size, size_t alignment)
428
+ {
429
+ return ((size + (alignment - 1)) & ~(alignment - 1));
430
+ }
431
+
432
+ static bool ptr_is_aligned(void *ptr, size_t alignment)
433
+ {
434
+ return ((uintptr_t)ptr & (alignment - 1)) == 0;
435
+ }
436
+
437
+ static void *ptr_align(void *ptr, size_t alignment)
438
+ {
439
+ return (void *)(((uintptr_t)ptr + (alignment - 1)) & ~(alignment - 1));
440
+ }
441
+
442
+ static bool is_power_of_two(size_t n) { return (n & (n - 1)) == 0; }
443
+
444
+ static int
445
+ internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
446
+ {
447
+ struct src_loc *l;
448
+ struct alloc_hdr *h;
449
+ void *real;
450
+ size_t asize;
451
+ size_t d = alignment / sizeof(void*);
452
+ size_t r = alignment % sizeof(void*);
453
+
454
+ if (!real_malloc) return ENOMEM;
455
+
456
+ if (r != 0 || d == 0 || !is_power_of_two(d))
457
+ return EINVAL;
458
+
459
+ if (alignment <= ASSUMED_MALLOC_ALIGNMENT) {
460
+ void *p = malloc(size);
461
+ if (!p) return ENOMEM;
462
+ *pp = p;
463
+ return 0;
464
+ }
465
+ for (; alignment < sizeof(struct alloc_hdr); alignment *= 2)
466
+ ; /* double alignment until >= sizeof(struct alloc_hdr) */
467
+ if (__builtin_add_overflow(size, alignment, &asize) ||
468
+ __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
469
+ return ENOMEM;
470
+
471
+ /* assert(asize == (alignment + size + sizeof(struct alloc_hdr))); */
472
+ l = track_memalign ? update_stats_rcu_lock(size, caller) : 0;
473
+ real = real_malloc(asize);
474
+ if (real) {
475
+ void *p = hdr2ptr(real);
476
+ if (!ptr_is_aligned(p, alignment))
477
+ p = ptr_align(p, alignment);
478
+ h = ptr2hdr(p);
479
+ alloc_insert_rcu(l, h, size, real);
480
+ *pp = p;
481
+ }
482
+ update_stats_rcu_unlock(l);
483
+
484
+ return real ? 0 : ENOMEM;
485
+ }
486
+
487
+ static void *
488
+ memalign_result(int err, void *p)
489
+ {
490
+ if (caa_unlikely(err)) {
491
+ errno = err;
492
+ return 0;
493
+ }
494
+ return p;
495
+ }
496
+
497
+ void *memalign(size_t alignment, size_t size)
498
+ {
499
+ void *p;
500
+ int err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
501
+ return memalign_result(err, p);
502
+ }
503
+
504
+ int posix_memalign(void **p, size_t alignment, size_t size)
505
+ {
506
+ return internal_memalign(p, alignment, size, RETURN_ADDRESS(0));
507
+ }
508
+
509
+ void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign")));
510
+ void cfree(void *) __attribute__((alias("free")));
511
+
512
+ void *valloc(size_t size)
513
+ {
514
+ void *p;
515
+ int err = internal_memalign(&p, page_size, size, RETURN_ADDRESS(0));
516
+ return memalign_result(err, p);
517
+ }
518
+
519
+ #if __GNUC__ < 7
520
+ # define add_overflow_p(a,b) __extension__({ \
521
+ __typeof__(a) _c; \
522
+ __builtin_add_overflow(a,b,&_c); \
523
+ })
524
+ #else
525
+ # define add_overflow_p(a,b) \
526
+ __builtin_add_overflow_p((a),(b),(__typeof__(a+b))0)
527
+ #endif
528
+
529
+ void *pvalloc(size_t size)
530
+ {
531
+ size_t alignment = page_size;
532
+ void *p;
533
+ int err;
534
+
535
+ if (add_overflow_p(size, alignment)) {
536
+ errno = ENOMEM;
537
+ return 0;
538
+ }
539
+ size = size_align(size, alignment);
540
+ err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
541
+ return memalign_result(err, p);
262
542
  }
263
543
 
264
- /*
265
- * Do we care for *memalign? ruby/gc.c uses it in ways this lib
266
- * doesn't care about, but maybe some gems use it, too.
267
- */
268
544
  void *malloc(size_t size)
269
545
  {
270
- RETURN_IF_NOT_READY();
271
- update_stats(size, RETURN_ADDRESS(0));
272
- return real_malloc(size);
546
+ struct src_loc *l;
547
+ struct alloc_hdr *h;
548
+ size_t asize;
549
+ void *p;
550
+
551
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
552
+ goto enomem;
553
+
554
+ /*
555
+ * Needed for C++ global declarations using "new",
556
+ * which happens before our constructor
557
+ */
558
+ #ifndef __FreeBSD__
559
+ if (!real_malloc) {
560
+ if (resolving_malloc) goto enomem;
561
+ resolving_malloc = 1;
562
+ real_malloc = dlsym(RTLD_NEXT, "malloc");
563
+ }
564
+ #endif
565
+ l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
566
+ p = h = real_malloc(asize);
567
+ if (h) {
568
+ alloc_insert_rcu(l, h, size, h);
569
+ p = hdr2ptr(h);
570
+ }
571
+ update_stats_rcu_unlock(l);
572
+ if (caa_unlikely(!p)) errno = ENOMEM;
573
+ return p;
574
+ enomem:
575
+ errno = ENOMEM;
576
+ return 0;
273
577
  }
274
578
 
275
579
  void *calloc(size_t nmemb, size_t size)
276
580
  {
581
+ void *p;
582
+ struct src_loc *l;
583
+ struct alloc_hdr *h;
584
+ size_t asize;
585
+
586
+ if (__builtin_mul_overflow(size, nmemb, &size)) {
587
+ errno = ENOMEM;
588
+ return 0;
589
+ }
590
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
591
+ errno = ENOMEM;
592
+ return 0;
593
+ }
277
594
  RETURN_IF_NOT_READY();
278
- /* ruby_xcalloc already does overflow checking */
279
- update_stats(nmemb * size, RETURN_ADDRESS(0));
280
- return real_calloc(nmemb, size);
595
+ l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
596
+ p = h = real_malloc(asize);
597
+ if (p) {
598
+ alloc_insert_rcu(l, h, size, h);
599
+ p = hdr2ptr(h);
600
+ memset(p, 0, size);
601
+ }
602
+ update_stats_rcu_unlock(l);
603
+ if (caa_unlikely(!p)) errno = ENOMEM;
604
+ return p;
281
605
  }
282
606
 
283
607
  void *realloc(void *ptr, size_t size)
284
608
  {
609
+ void *p;
610
+ struct src_loc *l;
611
+ struct alloc_hdr *h;
612
+ size_t asize;
613
+
614
+ if (!size) {
615
+ free(ptr);
616
+ return 0;
617
+ }
618
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
619
+ errno = ENOMEM;
620
+ return 0;
621
+ }
285
622
  RETURN_IF_NOT_READY();
286
- update_stats(size, RETURN_ADDRESS(0));
287
- return real_realloc(ptr, size);
623
+
624
+ l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
625
+ p = h = real_malloc(asize);
626
+ if (p) {
627
+ alloc_insert_rcu(l, h, size, h);
628
+ p = hdr2ptr(h);
629
+ }
630
+ update_stats_rcu_unlock(l);
631
+
632
+ if (ptr && p) {
633
+ struct alloc_hdr *old = ptr2hdr(ptr);
634
+ memcpy(p, ptr, old->size < size ? old->size : size);
635
+ free(ptr);
636
+ }
637
+ if (caa_unlikely(!p)) errno = ENOMEM;
638
+ return p;
288
639
  }
289
640
 
290
641
  struct dump_arg {
@@ -314,7 +665,7 @@ static void *dump_to_file(void *x)
314
665
  p = s[0];
315
666
  }
316
667
  fprintf(a->fp, "%16zu %12zu %s\n",
317
- l->total, l->calls, (const char *)p);
668
+ l->total, l->allocations, (const char *)p);
318
669
  if (s) free(s);
319
670
  }
320
671
  out_unlock:
@@ -349,6 +700,7 @@ static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
349
700
  io = *((VALUE *)dlsym(RTLD_DEFAULT, "rb_stderr"));
350
701
 
351
702
  a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
703
+ io = rb_io_get_io(io);
352
704
  io = rb_io_get_write_io(io);
353
705
  GetOpenFile(io, fptr);
354
706
  a.fp = rb_io_stdio_file(fptr);
@@ -358,49 +710,6 @@ static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
358
710
  return Qnil;
359
711
  }
360
712
 
361
- static void
362
- free_src_loc(struct rcu_head *head)
363
- {
364
- struct src_loc *l = caa_container_of(head, struct src_loc, rcu_head);
365
- free(l);
366
- }
367
-
368
- static void *totals_clear(void *ign)
369
- {
370
- struct cds_lfht *new, *old;
371
- struct cds_lfht_iter iter;
372
- struct src_loc *l;
373
-
374
- new = lfht_new();
375
- rcu_read_lock();
376
- old = rcu_dereference(totals);
377
- rcu_assign_pointer(totals, new);
378
- cds_lfht_for_each_entry(old, &iter, l, hnode) {
379
- cds_lfht_del(old, &l->hnode);
380
- call_rcu(&l->rcu_head, free_src_loc);
381
- }
382
- rcu_read_unlock();
383
-
384
- synchronize_rcu(); /* ensure totals points to new */
385
- cds_lfht_destroy(old, NULL);
386
- return 0;
387
- }
388
-
389
- /*
390
- * call-seq:
391
- *
392
- * Mwrap.clear -> nil
393
- *
394
- * Atomically replaces the totals table and destroys the old one.
395
- * This resets all statistics. It is more expensive than `Mwrap.reset'
396
- * as new allocations will need to be made to repopulate the new table.
397
- */
398
- static VALUE mwrap_clear(VALUE mod)
399
- {
400
- rb_thread_call_without_gvl(totals_clear, 0, 0, 0);
401
- return Qnil;
402
- }
403
-
404
713
  static void *totals_reset(void *ign)
405
714
  {
406
715
  struct cds_lfht *t;
@@ -411,7 +720,10 @@ static void *totals_reset(void *ign)
411
720
  t = rcu_dereference(totals);
412
721
  cds_lfht_for_each_entry(t, &iter, l, hnode) {
413
722
  uatomic_set(&l->total, 0);
414
- uatomic_set(&l->calls, 0);
723
+ uatomic_set(&l->allocations, 0);
724
+ uatomic_set(&l->frees, 0);
725
+ uatomic_set(&l->age_total, 0);
726
+ uatomic_set(&l->max_lifespan, 0);
415
727
  }
416
728
  rcu_read_unlock();
417
729
  return 0;
@@ -423,8 +735,8 @@ static void *totals_reset(void *ign)
423
735
  * Mwrap.reset -> nil
424
736
  *
425
737
  * Resets the the total tables by zero-ing all counters.
426
- * This resets all statistics and is less costly than `Mwrap.clear'
427
- * but is not an atomic operation.
738
+ * This resets all statistics. This is not an atomic operation
739
+ * as other threads (outside of GVL) may increment counters.
428
740
  */
429
741
  static VALUE mwrap_reset(VALUE mod)
430
742
  {
@@ -432,13 +744,40 @@ static VALUE mwrap_reset(VALUE mod)
432
744
  return Qnil;
433
745
  }
434
746
 
435
- static VALUE dump_ensure(VALUE ignored)
747
+ /* :nodoc: */
748
+ static VALUE mwrap_clear(VALUE mod)
749
+ {
750
+ return mwrap_reset(mod);
751
+ }
752
+
753
+ static VALUE rcu_unlock_ensure(VALUE ignored)
436
754
  {
437
755
  rcu_read_unlock();
438
756
  --locating;
439
757
  return Qfalse;
440
758
  }
441
759
 
760
+ static VALUE location_string(struct src_loc *l)
761
+ {
762
+ VALUE ret, tmp;
763
+
764
+ if (loc_is_addr(l)) {
765
+ char **s = backtrace_symbols((void *)l->k, 1);
766
+ tmp = rb_str_new_cstr(s[0]);
767
+ free(s);
768
+ }
769
+ else {
770
+ tmp = rb_str_new(l->k, l->capa - 1);
771
+ }
772
+
773
+ /* deduplicate and try to free up some memory */
774
+ ret = rb_funcall(tmp, id_uminus, 0);
775
+ if (!OBJ_FROZEN_RAW(tmp))
776
+ rb_str_resize(tmp, 0);
777
+
778
+ return ret;
779
+ }
780
+
442
781
  static VALUE dump_each_rcu(VALUE x)
443
782
  {
444
783
  struct dump_arg *a = (struct dump_arg *)x;
@@ -448,27 +787,17 @@ static VALUE dump_each_rcu(VALUE x)
448
787
 
449
788
  t = rcu_dereference(totals);
450
789
  cds_lfht_for_each_entry(t, &iter, l, hnode) {
451
- VALUE v[3];
790
+ VALUE v[6];
452
791
  if (l->total <= a->min) continue;
453
792
 
454
- if (loc_is_addr(l)) {
455
- char **s = backtrace_symbols((void *)l->k, 1);
456
- v[1] = rb_str_new_cstr(s[0]);
457
- free(s);
458
- }
459
- else {
460
- v[1] = rb_str_new(l->k, l->capa - 1);
461
- }
462
-
463
- /* deduplicate and try to free up some memory */
464
- v[0] = rb_funcall(v[1], id_uminus, 0);
465
- if (!OBJ_FROZEN_RAW(v[1]))
466
- rb_str_resize(v[1], 0);
467
-
793
+ v[0] = location_string(l);
468
794
  v[1] = SIZET2NUM(l->total);
469
- v[2] = SIZET2NUM(l->calls);
795
+ v[2] = SIZET2NUM(l->allocations);
796
+ v[3] = SIZET2NUM(l->frees);
797
+ v[4] = SIZET2NUM(l->age_total);
798
+ v[5] = SIZET2NUM(l->max_lifespan);
470
799
 
471
- rb_yield_values2(3, v);
800
+ rb_yield_values2(6, v);
472
801
  assert(rcu_read_ongoing());
473
802
  }
474
803
  return Qnil;
@@ -477,10 +806,12 @@ static VALUE dump_each_rcu(VALUE x)
477
806
  /*
478
807
  * call-seq:
479
808
  *
480
- * Mwrap.each([min]) { |location,total_bytes,call_count| ... }
809
+ * Mwrap.each([min]) do |location,total,allocations,frees,age_total,max_lifespan|
810
+ * ...
811
+ * end
481
812
  *
482
813
  * Yields each entry of the of the table to a caller-supplied block.
483
- * +min+ may be specified to filter out lines with +total_bytes+
814
+ * +min+ may be specified to filter out lines with +total+ bytes
484
815
  * equal-to-or-smaller-than the supplied minimum.
485
816
  */
486
817
  static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
@@ -494,7 +825,212 @@ static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
494
825
  ++locating;
495
826
  rcu_read_lock();
496
827
 
497
- return rb_ensure(dump_each_rcu, (VALUE)&a, dump_ensure, 0);
828
+ return rb_ensure(dump_each_rcu, (VALUE)&a, rcu_unlock_ensure, 0);
829
+ }
830
+
831
+ static size_t
832
+ src_loc_memsize(const void *p)
833
+ {
834
+ return sizeof(struct src_loc);
835
+ }
836
+
837
+ static const rb_data_type_t src_loc_type = {
838
+ "source_location",
839
+ /* no marking, no freeing */
840
+ { 0, 0, src_loc_memsize, /* reserved */ },
841
+ /* parent, data, [ flags ] */
842
+ };
843
+
844
+ static VALUE cSrcLoc;
845
+
846
+ static int
847
+ extract_addr(const char *str, size_t len, void **p)
848
+ {
849
+ const char *c;
850
+ #if defined(__GLIBC__)
851
+ return ((c = memrchr(str, '[', len)) && sscanf(c, "[%p]", p));
852
+ #else /* tested FreeBSD */
853
+ return ((c = strstr(str, "0x")) && sscanf(c, "%p", p));
854
+ #endif
855
+ }
856
+
857
+ /*
858
+ * call-seq:
859
+ * Mwrap[location] -> Mwrap::SourceLocation
860
+ *
861
+ * Returns the associated Mwrap::SourceLocation given the +location+
862
+ * String. +location+ is either a Ruby source location path:line
863
+ * (e.g. "/path/to/foo.rb:5") or a hexadecimal memory address with
864
+ * square-braces part yielded by Mwrap.dump (e.g. "[0xdeadbeef]")
865
+ */
866
+ static VALUE mwrap_aref(VALUE mod, VALUE loc)
867
+ {
868
+ const char *str = StringValueCStr(loc);
869
+ int len = RSTRING_LENINT(loc);
870
+ struct src_loc *k = 0;
871
+ uintptr_t p;
872
+ struct cds_lfht_iter iter;
873
+ struct cds_lfht_node *cur;
874
+ struct cds_lfht *t;
875
+ struct src_loc *l;
876
+ VALUE val = Qnil;
877
+
878
+ if (extract_addr(str, len, (void **)&p)) {
879
+ k = (void *)kbuf;
880
+ memcpy(k->k, &p, sizeof(p));
881
+ k->capa = 0;
882
+ k->hval = jhash(k->k, sizeof(p), 0xdeadbeef);
883
+ } else {
884
+ k = (void *)kbuf;
885
+ memcpy(k->k, str, len + 1);
886
+ k->capa = len + 1;
887
+ k->hval = jhash(k->k, k->capa, 0xdeadbeef);
888
+ }
889
+
890
+ if (!k) return val;
891
+
892
+ rcu_read_lock();
893
+ t = rcu_dereference(totals);
894
+ if (!t) goto out_unlock;
895
+
896
+ cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
897
+ cur = cds_lfht_iter_get_node(&iter);
898
+ if (cur) {
899
+ l = caa_container_of(cur, struct src_loc, hnode);
900
+ val = TypedData_Wrap_Struct(cSrcLoc, &src_loc_type, l);
901
+ }
902
+ out_unlock:
903
+ rcu_read_unlock();
904
+ return val;
905
+ }
906
+
907
+ static VALUE src_loc_each_i(VALUE p)
908
+ {
909
+ struct alloc_hdr *h;
910
+ struct src_loc *l = (struct src_loc *)p;
911
+
912
+ cds_list_for_each_entry_rcu(h, &l->allocs, anode) {
913
+ size_t gen = uatomic_read(&h->as.live.gen);
914
+ size_t size = uatomic_read(&h->size);
915
+
916
+ if (size) {
917
+ VALUE v[2];
918
+ v[0] = SIZET2NUM(size);
919
+ v[1] = SIZET2NUM(gen);
920
+
921
+ rb_yield_values2(2, v);
922
+ }
923
+ }
924
+
925
+ return Qfalse;
926
+ }
927
+
928
+ static struct src_loc *src_loc_get(VALUE self)
929
+ {
930
+ struct src_loc *l;
931
+ TypedData_Get_Struct(self, struct src_loc, &src_loc_type, l);
932
+ assert(l);
933
+ return l;
934
+ }
935
+
936
+ /*
937
+ * call-seq:
938
+ * loc = Mwrap[location]
939
+ * loc.each { |size,generation| ... }
940
+ *
941
+ * Iterates through live allocations for a given Mwrap::SourceLocation,
942
+ * yielding the +size+ (in bytes) and +generation+ of each allocation.
943
+ * The +generation+ is the value of the GC.count method at the time
944
+ * the allocation was made.
945
+ *
946
+ * This functionality is only available in mwrap 2.0.0+
947
+ */
948
+ static VALUE src_loc_each(VALUE self)
949
+ {
950
+ struct src_loc *l = src_loc_get(self);
951
+
952
+ assert(locating == 0 && "forgot to clear locating");
953
+ ++locating;
954
+ rcu_read_lock();
955
+ rb_ensure(src_loc_each_i, (VALUE)l, rcu_unlock_ensure, 0);
956
+ return self;
957
+ }
958
+
959
+ /*
960
+ * The the mean lifespan (in GC generations) of allocations made from this
961
+ * location. This does not account for live allocations.
962
+ */
963
+ static VALUE src_loc_mean_lifespan(VALUE self)
964
+ {
965
+ struct src_loc *l = src_loc_get(self);
966
+ size_t tot, frees;
967
+
968
+ frees = uatomic_read(&l->frees);
969
+ tot = uatomic_read(&l->age_total);
970
+ return DBL2NUM(frees ? ((double)tot/(double)frees) : HUGE_VAL);
971
+ }
972
+
973
+ /* The number of frees made from this location */
974
+ static VALUE src_loc_frees(VALUE self)
975
+ {
976
+ return SIZET2NUM(uatomic_read(&src_loc_get(self)->frees));
977
+ }
978
+
979
+ /* The number of allocations made from this location */
980
+ static VALUE src_loc_allocations(VALUE self)
981
+ {
982
+ return SIZET2NUM(uatomic_read(&src_loc_get(self)->allocations));
983
+ }
984
+
985
+ /* The total number of bytes allocated from this location */
986
+ static VALUE src_loc_total(VALUE self)
987
+ {
988
+ return SIZET2NUM(uatomic_read(&src_loc_get(self)->total));
989
+ }
990
+
991
+ /*
992
+ * The maximum age (in GC generations) of an allocation before it was freed.
993
+ * This does not account for live allocations.
994
+ */
995
+ static VALUE src_loc_max_lifespan(VALUE self)
996
+ {
997
+ return SIZET2NUM(uatomic_read(&src_loc_get(self)->max_lifespan));
998
+ }
999
+
1000
+ /*
1001
+ * Returns a frozen String location of the given SourceLocation object.
1002
+ */
1003
+ static VALUE src_loc_name(VALUE self)
1004
+ {
1005
+ struct src_loc *l = src_loc_get(self);
1006
+ VALUE ret;
1007
+
1008
+ ++locating;
1009
+ ret = location_string(l);
1010
+ --locating;
1011
+ return ret;
1012
+ }
1013
+
1014
+ static VALUE reset_locating(VALUE ign) { --locating; return Qfalse; }
1015
+
1016
+ /*
1017
+ * call-seq:
1018
+ *
1019
+ * Mwrap.quiet do |depth|
1020
+ * # expensive sort/calculate/emitting results of Mwrap.each
1021
+ * # affecting statistics of the rest of the app
1022
+ * end
1023
+ *
1024
+ * Stops allocation tracking inside the block. This is useful for
1025
+ * monitoring code which calls other Mwrap (or ObjectSpace/GC)
1026
+ * functions which unavoidably allocate memory.
1027
+ *
1028
+ * This feature was added in mwrap 2.0.0+
1029
+ */
1030
+ static VALUE mwrap_quiet(VALUE mod)
1031
+ {
1032
+ size_t cur = ++locating;
1033
+ return rb_ensure(rb_yield, SIZET2NUM(cur), reset_locating, 0);
498
1034
  }
499
1035
 
500
1036
  /*
@@ -515,19 +1051,47 @@ static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
515
1051
  * * dump_fd: a writable FD to dump to
516
1052
  * * dump_path: a path to dump to, the file is opened in O_APPEND mode
517
1053
  * * dump_min: the minimum allocation size (total) to dump
1054
+ * * memalign: use `1' to enable tracking the memalign family
518
1055
  *
519
1056
  * If both `dump_fd' and `dump_path' are specified, dump_path takes
520
1057
  * precedence.
1058
+ *
1059
+ * Tracking the memalign family of functions is misleading for Ruby
1060
+ * applications, as heap page allocations can happen anywhere a
1061
+ * Ruby object is allocated, even in the coldest code paths.
1062
+ * Furthermore, it is rarely-used outside of the Ruby object allocator.
1063
+ * Thus tracking memalign functions is disabled by default.
521
1064
  */
522
1065
  void Init_mwrap(void)
523
1066
  {
524
- VALUE mod = rb_define_module("Mwrap");
1067
+ VALUE mod;
1068
+
1069
+ ++locating;
1070
+ mod = rb_define_module("Mwrap");
525
1071
  id_uminus = rb_intern("-@");
526
1072
 
1073
+ /*
1074
+ * Represents a location in source code or library
1075
+ * address which calls a memory allocation. It is
1076
+ * updated automatically as allocations are made, so
1077
+ * there is no need to reload or reread it from Mwrap#[].
1078
+ * This class is only available since mwrap 2.0.0+.
1079
+ */
1080
+ cSrcLoc = rb_define_class_under(mod, "SourceLocation", rb_cObject);
527
1081
  rb_define_singleton_method(mod, "dump", mwrap_dump, -1);
528
- rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
529
1082
  rb_define_singleton_method(mod, "reset", mwrap_reset, 0);
1083
+ rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
530
1084
  rb_define_singleton_method(mod, "each", mwrap_each, -1);
1085
+ rb_define_singleton_method(mod, "[]", mwrap_aref, 1);
1086
+ rb_define_singleton_method(mod, "quiet", mwrap_quiet, 0);
1087
+ rb_define_method(cSrcLoc, "each", src_loc_each, 0);
1088
+ rb_define_method(cSrcLoc, "frees", src_loc_frees, 0);
1089
+ rb_define_method(cSrcLoc, "allocations", src_loc_allocations, 0);
1090
+ rb_define_method(cSrcLoc, "total", src_loc_total, 0);
1091
+ rb_define_method(cSrcLoc, "mean_lifespan", src_loc_mean_lifespan, 0);
1092
+ rb_define_method(cSrcLoc, "max_lifespan", src_loc_max_lifespan, 0);
1093
+ rb_define_method(cSrcLoc, "name", src_loc_name, 0);
1094
+ --locating;
531
1095
  }
532
1096
 
533
1097
  /* rb_cloexec_open isn't usable by non-Ruby processes */
@@ -538,18 +1102,18 @@ void Init_mwrap(void)
538
1102
  __attribute__ ((destructor))
539
1103
  static void mwrap_dump_destructor(void)
540
1104
  {
541
- const char *opt = getenv("MWRAP");
542
- const char *modes[] = { "a", "a+", "w", "w+", "r+" };
543
- struct dump_arg a;
544
- size_t i;
545
- int dump_fd;
1105
+ const char *opt = getenv("MWRAP");
1106
+ const char *modes[] = { "a", "a+", "w", "w+", "r+" };
1107
+ struct dump_arg a;
1108
+ size_t i;
1109
+ int dump_fd;
546
1110
  char *dump_path;
547
1111
 
548
1112
  if (!opt)
549
1113
  return;
550
1114
 
551
- ++locating;
552
- if ((dump_path = strstr(opt, "dump_path:")) &&
1115
+ ++locating;
1116
+ if ((dump_path = strstr(opt, "dump_path:")) &&
553
1117
  (dump_path += sizeof("dump_path")) &&
554
1118
  *dump_path) {
555
1119
  char *end = strchr(dump_path, ',');
@@ -594,5 +1158,5 @@ static void mwrap_dump_destructor(void)
594
1158
  }
595
1159
  dump_to_file(&a);
596
1160
  out:
597
- --locating;
1161
+ --locating;
598
1162
  }