mwrap 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '049853543dbde8f93776318db059ac2d40ee82a241d080a8236a7d43efade454'
4
- data.tar.gz: a29b74fef93affeb9ffa30276fe9f75f9f5952cbfae142a749ba1b9409153789
3
+ metadata.gz: 8559afec1946b7f545c944085aac5e205601deb82fa9f1529785dd3ef7526e5a
4
+ data.tar.gz: 34ea90410103ec59f367baa00e1e4df6424fe67af8608b5c7a3a0cb66fec2440
5
5
  SHA512:
6
- metadata.gz: 52bad7de38eb6b256b1cecbb9a7a83da094602adf4162730fed0d58ff3fa3f605fcef77e724b3a3e8557bbbce597a9369d3673f2be2d176ddc841dc2cc3c2208
7
- data.tar.gz: 2b446a6a8ceef8cc5b7a34211fc2d7cd07e50692bcca6fe4a13d115b359b8fde7042ae1a6f7c53829f2b65fb9fd4158492e1dd142633a30c672eccb6120a1d71
6
+ metadata.gz: ff91f6f250e7a18cb465a6dc04ddee374411a10a92cd1b3227e09a4ccd0be1b8597f475f71d5061bd5bc36e8c0418ce3bf6faa3a78b37aec5d21b0340f736031
7
+ data.tar.gz: 06311f2949ae4dae6bd2d0e95e6b0d147b44b6bae6cd4464e4c1d160fc1f5f3f99b2a6a3a3e711a10b8894da3bdb6220ccfeb72dfb18545170edbbc65dbad225
@@ -0,0 +1,2 @@
1
+ ext/mwrap/mwrap.c
2
+ lib/mwrap_rack.rb
data/MANIFEST CHANGED
@@ -8,5 +8,6 @@ bin/mwrap
8
8
  ext/mwrap/extconf.rb
9
9
  ext/mwrap/jhash.h
10
10
  ext/mwrap/mwrap.c
11
+ lib/mwrap_rack.rb
11
12
  mwrap.gemspec
12
13
  test/test_mwrap.rb
data/README CHANGED
@@ -4,20 +4,23 @@ mwrap is designed to answer the question:
4
4
 
5
5
  Which lines of Ruby are hitting malloc the most?
6
6
 
7
- mwrap wraps all malloc, calloc, and realloc calls to trace the Ruby
8
- source location of such calls and bytes allocated at each callsite.
9
- This functionality may be expanded in the future.
10
-
11
- It does not track allocation lifetimes, or frees, however. It works
12
- best for allocations under GVL, but tries to track numeric caller
13
- addresses for allocations made without GVL so you can get an idea of how
14
- much memory usage certain extensions and native libraries use.
7
+ mwrap wraps all malloc-family calls to trace the Ruby source
8
+ location of such calls and bytes allocated at each callsite.
9
+ As of mwrap 2.0.0, it can also function as a leak detector
10
+ and show live allocations at every call site. Depending on
11
+ your application and workload, the overhead is roughly a 50%
12
+ increase memory and runtime.
13
+
14
+ It works best for allocations under GVL, but tries to track
15
+ numeric caller addresses for allocations made without GVL so you
16
+ can get an idea of how much memory usage certain extensions and
17
+ native libraries use.
15
18
 
16
19
  It requires the concurrent lock-free hash table from the
17
20
  Userspace RCU project: https://liburcu.org/
18
21
 
19
- It does not require recompiling or rebuilding Ruby, but only supports
20
- Ruby trunk (2.6.0dev+) on a few platforms:
22
+ It does not require recompiling or rebuilding Ruby, but only
23
+ supports Ruby trunk (2.6.0dev+) on a few platforms:
21
24
 
22
25
  * GNU/Linux
23
26
  * FreeBSD (tested 11.1)
@@ -36,28 +39,35 @@ It may work on NetBSD, OpenBSD and DragonFly BSD.
36
39
  == Usage
37
40
 
38
41
  mwrap works as an LD_PRELOAD and supplies a mwrap RubyGem executable to
39
- improve ease-of-use. You can set dump_fd: in the MWRAP environment
40
- variable to dump the results to a certain file descriptor at exit:
41
-
42
- # Dump results to stderr at exit:
43
- MWRAP=dump_fd:2 mwrap RUBY_COMMAND
44
-
45
- You may also set dump_path to append to a log file:
42
+ improve ease-of-use. You can set dump_path: in the MWRAP environment
43
+ variable to append the results to a log file:
46
44
 
47
45
  MWRAP=dump_path:/path/to/log mwrap RUBY_COMMAND
48
46
 
49
- You may also `require 'mwrap'' in your Ruby code and use
50
- Mwrap.dump, Mwrap.clear, Mwrap.reset, Mwrap.each, etc.
47
+ # And to display the locations with the most allocations:
48
+ sort -k1,1rn </path/to/log | $PAGER
49
+
50
+ You may also `require "mwrap"' in your Ruby code and use
51
+ Mwrap.dump, Mwrap.reset, Mwrap.each, etc.
51
52
 
52
53
  However, mwrap MUST be loaded via LD_PRELOAD to have any
53
- effect in tracking malloc use.
54
+ effect in tracking malloc use. However, it is safe to keep
55
+ "require 'mwrap'" in performance-critical deployments,
56
+ as overhead is only incurred when used as an LD_PRELOAD.
54
57
 
55
58
  The output of the mwrap dump is a text file with 3 columns:
56
59
 
57
60
  total_bytes call_count location
58
61
 
59
62
  Where location is a Ruby source location (if made under GVL)
60
- or an address retrieved by backtrace_symbols(3)
63
+ or an address retrieved by backtrace_symbols(3). It is
64
+ recommended to use the sort(1) command on either of the
65
+ first two columns to find the hottest malloc locations.
66
+
67
+ mwrap 2.0.0+ also supports a Rack application endpoint,
68
+ it is documented at:
69
+
70
+ https://80x24.org/mwrap/MwrapRack.html
61
71
 
62
72
  == Known problems
63
73
 
data/bin/mwrap CHANGED
@@ -26,4 +26,11 @@ if ENV['MWRAP'] =~ /dump_fd:(\d+)/
26
26
  opts[dump_fd] = dump_io
27
27
  end
28
28
  end
29
+
30
+ # allow inheriting FDs from systemd
31
+ n = ENV['LISTEN_FDS']
32
+ if n && ENV['LISTEN_PID'].to_i == $$
33
+ n = 3 + n.to_i
34
+ (3...n).each { |fd| opts[fd] = IO.new(fd) }
35
+ end
29
36
  exec *ARGV, opts
@@ -10,4 +10,19 @@ have_library 'urcu-bp' or abort 'liburcu-bp not found'
10
10
  have_library 'dl'
11
11
  have_library 'c'
12
12
  have_library 'execinfo' # FreeBSD
13
+
14
+ if try_link(<<'')
15
+ int main(void) { return __builtin_add_overflow_p(0,0,(int)1); }
16
+
17
+ $defs << '-DHAVE_BUILTIN_ADD_OVERFLOW_P'
18
+ end
19
+
20
+ if try_link(<<'')
21
+ int main(int a) { return __builtin_add_overflow(0,0,&a); }
22
+
23
+ $defs << '-DHAVE_BUILTIN_ADD_OVERFLOW_P'
24
+ else
25
+ abort 'missing __builtin_add_overflow'
26
+ end
27
+
13
28
  create_makefile 'mwrap'
@@ -16,26 +16,40 @@
16
16
  #include <sys/types.h>
17
17
  #include <sys/stat.h>
18
18
  #include <fcntl.h>
19
+ #include <pthread.h>
19
20
  #include <urcu-bp.h>
20
21
  #include <urcu/rculfhash.h>
22
+ #include <urcu/rculist.h>
21
23
  #include "jhash.h"
22
24
 
23
25
  static ID id_uminus;
26
+ static unsigned int track_memalign;
24
27
  const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev */
25
- static int *(*has_gvl_p)(void);
28
+ extern int __attribute__((weak)) ruby_thread_has_gvl_p(void);
29
+ extern void * __attribute__((weak)) ruby_current_execution_context_ptr;
30
+ extern void * __attribute__((weak)) ruby_current_vm_ptr; /* for rb_gc_count */
31
+ extern size_t __attribute__((weak)) rb_gc_count(void);
32
+ extern VALUE __attribute__((weak)) rb_cObject;
33
+ extern VALUE __attribute__((weak)) rb_yield(VALUE);
34
+
35
+ /* true for glibc/dlmalloc/ptmalloc, not sure about jemalloc */
36
+ #define ASSUMED_MALLOC_ALIGNMENT (sizeof(void *) * 2)
37
+
38
+ int __attribute__((weak)) ruby_thread_has_gvl_p(void)
39
+ {
40
+ return 0;
41
+ }
42
+
26
43
  #ifdef __FreeBSD__
27
44
  void *__malloc(size_t);
28
- void *__calloc(size_t, size_t);
29
- void *__realloc(void *, size_t);
30
- static void *(*real_malloc)(size_t) = __malloc;
31
- static void *(*real_calloc)(size_t, size_t) = __calloc;
32
- static void *(*real_realloc)(void *, size_t) = __realloc;
33
- # define RETURN_IF_NOT_READY() do {} while (0) /* nothing */
45
+ void __free(void *);
46
+ # define real_malloc __malloc
47
+ # define real_free __free
34
48
  #else
35
- static int ready;
36
49
  static void *(*real_malloc)(size_t);
37
- static void *(*real_calloc)(size_t, size_t);
38
- static void *(*real_realloc)(void *, size_t);
50
+ static void (*real_free)(void *);
51
+ static int resolving_malloc;
52
+ #endif /* !FreeBSD */
39
53
 
40
54
  /*
41
55
  * we need to fake an OOM condition while dlsym is running,
@@ -43,22 +57,33 @@ static void *(*real_realloc)(void *, size_t);
43
57
  * symbol for the jemalloc calloc, yet
44
58
  */
45
59
  # define RETURN_IF_NOT_READY() do { \
46
- if (!ready) { \
60
+ if (!real_malloc) { \
47
61
  errno = ENOMEM; \
48
62
  return NULL; \
49
63
  } \
50
64
  } while (0)
51
65
 
52
- #endif /* !FreeBSD */
66
+ static __thread size_t locating;
67
+ static size_t generation;
68
+ static size_t page_size;
69
+ static struct cds_lfht *totals;
70
+ union padded_mutex {
71
+ pthread_mutex_t mtx;
72
+ char pad[64];
73
+ };
53
74
 
54
- /*
55
- * rb_source_location_cstr relies on GET_EC(), and it's possible
56
- * to have a native thread but no EC during the early and late
57
- * (teardown) phases of the Ruby process
58
- */
59
- static void **ec_loc;
75
+ /* a round-robin pool of mutexes */
76
+ #define MUTEX_NR (1 << 6)
77
+ #define MUTEX_MASK (MUTEX_NR - 1)
78
+ static size_t mutex_i;
79
+ static union padded_mutex mutexes[MUTEX_NR] = {
80
+ [0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
81
+ };
60
82
 
61
- static struct cds_lfht *totals;
83
+ static pthread_mutex_t *mutex_assign(void)
84
+ {
85
+ return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx;
86
+ }
62
87
 
63
88
  static struct cds_lfht *
64
89
  lfht_new(void)
@@ -69,19 +94,40 @@ lfht_new(void)
69
94
  __attribute__((constructor)) static void resolve_malloc(void)
70
95
  {
71
96
  int err;
97
+ const char *opt;
98
+ ++locating;
72
99
 
73
- #ifndef __FreeBSD__
74
- real_malloc = dlsym(RTLD_NEXT, "malloc");
75
- real_calloc = dlsym(RTLD_NEXT, "calloc");
76
- real_realloc = dlsym(RTLD_NEXT, "realloc");
77
- if (!real_calloc || !real_malloc || !real_realloc) {
78
- fprintf(stderr, "missing calloc/malloc/realloc %p %p %p\n",
79
- real_calloc, real_malloc, real_realloc);
100
+ #ifdef __FreeBSD__
101
+ /*
102
+ * PTHREAD_MUTEX_INITIALIZER on FreeBSD means lazy initialization,
103
+ * which happens at pthread_mutex_lock, and that calls calloc
104
+ */
105
+ {
106
+ size_t i;
107
+
108
+ for (i = 0; i < MUTEX_NR; i++) {
109
+ err = pthread_mutex_init(&mutexes[i].mtx, 0);
110
+ if (err) {
111
+ fprintf(stderr, "error: %s\n", strerror(err));
112
+ _exit(1);
113
+ }
114
+ }
115
+ /* initialize mutexes used by urcu-bp */
116
+ rcu_read_lock();
117
+ rcu_read_unlock();
118
+ }
119
+ #else /* !FreeBSD (tested on GNU/Linux) */
120
+ if (!real_malloc) {
121
+ resolving_malloc = 1;
122
+ real_malloc = dlsym(RTLD_NEXT, "malloc");
123
+ }
124
+ real_free = dlsym(RTLD_NEXT, "free");
125
+ if (!real_malloc || !real_free) {
126
+ fprintf(stderr, "missing malloc/aligned_alloc/free\n"
127
+ "\t%p %p\n", real_malloc, real_free);
80
128
  _exit(1);
81
129
  }
82
- ready = 1;
83
- #endif
84
-
130
+ #endif /* !FreeBSD */
85
131
  totals = lfht_new();
86
132
  if (!totals)
87
133
  fprintf(stderr, "failed to allocate totals table\n");
@@ -91,14 +137,27 @@ __attribute__((constructor)) static void resolve_malloc(void)
91
137
  call_rcu_after_fork_child);
92
138
  if (err)
93
139
  fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err));
140
+ page_size = sysconf(_SC_PAGESIZE);
141
+ opt = getenv("MWRAP");
142
+ if (opt && (opt = strstr(opt, "memalign:"))) {
143
+ if (!sscanf(opt, "memalign:%u", &track_memalign))
144
+ fprintf(stderr, "not an unsigned int: %s\n", opt);
145
+ }
146
+ --locating;
147
+ }
94
148
 
95
- has_gvl_p = dlsym(RTLD_DEFAULT, "ruby_thread_has_gvl_p");
149
+ static void
150
+ mutex_lock(pthread_mutex_t *m)
151
+ {
152
+ int err = pthread_mutex_lock(m);
153
+ assert(err == 0);
154
+ }
96
155
 
97
- /*
98
- * resolve dynamically so it doesn't break when LD_PRELOAD-ed
99
- * into non-Ruby binaries
100
- */
101
- ec_loc = dlsym(RTLD_DEFAULT, "ruby_current_execution_context_ptr");
156
+ static void
157
+ mutex_unlock(pthread_mutex_t *m)
158
+ {
159
+ int err = pthread_mutex_unlock(m);
160
+ assert(err == 0);
102
161
  }
103
162
 
104
163
  #ifndef HAVE_MEMPCPY
@@ -114,8 +173,6 @@ my_mempcpy(void *dest, const void *src, size_t n)
114
173
  #define RETURN_ADDRESS(nr) \
115
174
  (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
116
175
 
117
- static __thread size_t locating;
118
-
119
176
  #define INT2STR_MAX (sizeof(int) == 4 ? 10 : 19)
120
177
  static char *int2str(int num, char *dst, size_t * size)
121
178
  {
@@ -143,21 +200,58 @@ static char *int2str(int num, char *dst, size_t * size)
143
200
  return NULL;
144
201
  }
145
202
 
203
+ /*
204
+ * rb_source_location_cstr relies on GET_EC(), and it's possible
205
+ * to have a native thread but no EC during the early and late
206
+ * (teardown) phases of the Ruby process
207
+ */
146
208
  static int has_ec_p(void)
147
209
  {
148
- return (ec_loc && *ec_loc);
210
+ return (ruby_thread_has_gvl_p() && ruby_current_vm_ptr &&
211
+ ruby_current_execution_context_ptr);
149
212
  }
150
213
 
214
+ /* allocated via real_malloc/real_free */
151
215
  struct src_loc {
152
- struct rcu_head rcu_head;
153
- size_t calls;
216
+ pthread_mutex_t *mtx;
154
217
  size_t total;
218
+ size_t allocations;
219
+ size_t frees;
220
+ size_t age_total; /* (age_total / frees) => mean age at free */
221
+ size_t max_lifespan;
155
222
  struct cds_lfht_node hnode;
223
+ struct cds_list_head allocs; /* <=> alloc_hdr.node */
156
224
  uint32_t hval;
157
225
  uint32_t capa;
158
226
  char k[];
159
227
  };
160
228
 
229
+ /* every allocation has this in the header, maintain alignment with malloc */
230
+ struct alloc_hdr {
231
+ struct cds_list_head anode; /* <=> src_loc.allocs */
232
+ union {
233
+ struct {
234
+ size_t gen; /* rb_gc_count() */
235
+ struct src_loc *loc;
236
+ } live;
237
+ struct rcu_head dead;
238
+ } as;
239
+ void *real; /* what to call real_free on */
240
+ size_t size;
241
+ };
242
+
243
+ static char kbuf[PATH_MAX + INT2STR_MAX + sizeof(struct alloc_hdr) + 2];
244
+
245
+ static struct alloc_hdr *ptr2hdr(void *p)
246
+ {
247
+ return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr));
248
+ }
249
+
250
+ static void *hdr2ptr(struct alloc_hdr *h)
251
+ {
252
+ return (void *)((uintptr_t)h + sizeof(struct alloc_hdr));
253
+ }
254
+
161
255
  static int loc_is_addr(const struct src_loc *l)
162
256
  {
163
257
  return l->capa == 0;
@@ -180,16 +274,14 @@ static int loc_eq(struct cds_lfht_node *node, const void *key)
180
274
  memcmp(k->k, existing->k, loc_size(k)) == 0);
181
275
  }
182
276
 
183
- static void totals_add(struct src_loc *k)
277
+ static struct src_loc *totals_add_rcu(struct src_loc *k)
184
278
  {
185
279
  struct cds_lfht_iter iter;
186
280
  struct cds_lfht_node *cur;
187
- struct src_loc *l;
281
+ struct src_loc *l = 0;
188
282
  struct cds_lfht *t;
189
283
 
190
-
191
284
  again:
192
- rcu_read_lock();
193
285
  t = rcu_dereference(totals);
194
286
  if (!t) goto out_unlock;
195
287
  cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
@@ -197,44 +289,58 @@ again:
197
289
  if (cur) {
198
290
  l = caa_container_of(cur, struct src_loc, hnode);
199
291
  uatomic_add(&l->total, k->total);
200
- uatomic_add(&l->calls, 1);
292
+ uatomic_add(&l->allocations, 1);
201
293
  } else {
202
294
  size_t n = loc_size(k);
203
- l = malloc(sizeof(*l) + n);
295
+ l = real_malloc(sizeof(*l) + n);
204
296
  if (!l) goto out_unlock;
205
-
206
297
  memcpy(l, k, sizeof(*l) + n);
207
- l->calls = 1;
298
+ l->mtx = mutex_assign();
299
+ l->age_total = 0;
300
+ l->max_lifespan = 0;
301
+ l->frees = 0;
302
+ l->allocations = 1;
303
+ CDS_INIT_LIST_HEAD(&l->allocs);
208
304
  cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
209
305
  if (cur != &l->hnode) { /* lost race */
210
306
  rcu_read_unlock();
211
- free(l);
307
+ real_free(l);
308
+ rcu_read_lock();
212
309
  goto again;
213
310
  }
214
311
  }
215
312
  out_unlock:
216
- rcu_read_unlock();
313
+ return l;
314
+ }
315
+
316
+ static void update_stats_rcu_unlock(const struct src_loc *l)
317
+ {
318
+ if (caa_likely(l)) rcu_read_unlock();
217
319
  }
218
320
 
219
- static void update_stats(size_t size, uintptr_t caller)
321
+ static struct src_loc *update_stats_rcu_lock(size_t size, uintptr_t caller)
220
322
  {
221
- struct src_loc *k;
323
+ struct src_loc *k, *ret = 0;
222
324
  static const size_t xlen = sizeof(caller);
223
325
  char *dst;
224
326
 
327
+ if (caa_unlikely(!totals)) return 0;
225
328
  if (locating++) goto out; /* do not recurse into another *alloc */
226
329
 
227
- if (has_gvl_p && has_gvl_p() && has_ec_p()) {
330
+ rcu_read_lock();
331
+ if (has_ec_p()) {
228
332
  int line;
229
333
  const char *ptr = rb_source_location_cstr(&line);
230
334
  size_t len;
231
335
  size_t int_size = INT2STR_MAX;
232
336
 
337
+ generation = rb_gc_count();
338
+
233
339
  if (!ptr) goto unknown;
234
340
 
235
341
  /* avoid vsnprintf or anything which could call malloc here: */
236
342
  len = strlen(ptr);
237
- k = alloca(sizeof(*k) + len + 1 + int_size + 1);
343
+ k = (void *)kbuf;
238
344
  k->total = size;
239
345
  dst = mempcpy(k->k, ptr, len);
240
346
  *dst++ = ':';
@@ -243,7 +349,7 @@ static void update_stats(size_t size, uintptr_t caller)
243
349
  *dst = 0; /* terminate string */
244
350
  k->capa = (uint32_t)(dst - k->k + 1);
245
351
  k->hval = jhash(k->k, k->capa, 0xdeadbeef);
246
- totals_add(k);
352
+ ret = totals_add_rcu(k);
247
353
  } else {
248
354
  rb_bug("bad math making key from location %s:%d\n",
249
355
  ptr, line);
@@ -255,36 +361,281 @@ unknown:
255
361
  memcpy(k->k, &caller, xlen);
256
362
  k->capa = 0;
257
363
  k->hval = jhash(k->k, xlen, 0xdeadbeef);
258
- totals_add(k);
364
+ ret = totals_add_rcu(k);
259
365
  }
260
366
  out:
261
367
  --locating;
368
+ return ret;
369
+ }
370
+
371
+ size_t malloc_usable_size(void *p)
372
+ {
373
+ return ptr2hdr(p)->size;
374
+ }
375
+
376
+ static void
377
+ free_hdr_rcu(struct rcu_head *dead)
378
+ {
379
+ struct alloc_hdr *h = caa_container_of(dead, struct alloc_hdr, as.dead);
380
+ real_free(h->real);
381
+ }
382
+
383
+ void free(void *p)
384
+ {
385
+ if (p) {
386
+ struct alloc_hdr *h = ptr2hdr(p);
387
+ struct src_loc *l = h->as.live.loc;
388
+
389
+ if (!real_free) return; /* oh well, leak a little */
390
+ if (l) {
391
+ size_t age = generation - h->as.live.gen;
392
+
393
+ uatomic_set(&h->size, 0);
394
+ uatomic_add(&l->frees, 1);
395
+ uatomic_add(&l->age_total, age);
396
+
397
+ mutex_lock(l->mtx);
398
+ cds_list_del_rcu(&h->anode);
399
+ if (age > l->max_lifespan)
400
+ l->max_lifespan = age;
401
+ mutex_unlock(l->mtx);
402
+
403
+ call_rcu(&h->as.dead, free_hdr_rcu);
404
+ }
405
+ else {
406
+ real_free(h->real);
407
+ }
408
+ }
409
+ }
410
+
411
+ static void
412
+ alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real)
413
+ {
414
+ /* we need src_loc to remain alive for the duration of this call */
415
+ if (!h) return;
416
+ h->size = size;
417
+ h->real = real;
418
+ h->as.live.loc = l;
419
+ h->as.live.gen = generation;
420
+ if (l) {
421
+ mutex_lock(l->mtx);
422
+ cds_list_add_rcu(&h->anode, &l->allocs);
423
+ mutex_unlock(l->mtx);
424
+ }
425
+ }
426
+
427
+ static size_t size_align(size_t size, size_t alignment)
428
+ {
429
+ return ((size + (alignment - 1)) & ~(alignment - 1));
430
+ }
431
+
432
+ static bool ptr_is_aligned(void *ptr, size_t alignment)
433
+ {
434
+ return ((uintptr_t)ptr & (alignment - 1)) == 0;
435
+ }
436
+
437
+ static void *ptr_align(void *ptr, size_t alignment)
438
+ {
439
+ return (void *)(((uintptr_t)ptr + (alignment - 1)) & ~(alignment - 1));
440
+ }
441
+
442
+ static bool is_power_of_two(size_t n) { return (n & (n - 1)) == 0; }
443
+
444
+ static int
445
+ internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
446
+ {
447
+ struct src_loc *l;
448
+ struct alloc_hdr *h;
449
+ void *real;
450
+ size_t asize;
451
+ size_t d = alignment / sizeof(void*);
452
+ size_t r = alignment % sizeof(void*);
453
+
454
+ if (!real_malloc) return ENOMEM;
455
+
456
+ if (r != 0 || d == 0 || !is_power_of_two(d))
457
+ return EINVAL;
458
+
459
+ if (alignment <= ASSUMED_MALLOC_ALIGNMENT) {
460
+ void *p = malloc(size);
461
+ if (!p) return ENOMEM;
462
+ *pp = p;
463
+ return 0;
464
+ }
465
+ for (; alignment < sizeof(struct alloc_hdr); alignment *= 2)
466
+ ; /* double alignment until >= sizeof(struct alloc_hdr) */
467
+ if (__builtin_add_overflow(size, alignment, &asize) ||
468
+ __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
469
+ return ENOMEM;
470
+
471
+ /* assert(asize == (alignment + size + sizeof(struct alloc_hdr))); */
472
+ l = track_memalign ? update_stats_rcu_lock(size, caller) : 0;
473
+ real = real_malloc(asize);
474
+ if (real) {
475
+ void *p = hdr2ptr(real);
476
+ if (!ptr_is_aligned(p, alignment))
477
+ p = ptr_align(p, alignment);
478
+ h = ptr2hdr(p);
479
+ alloc_insert_rcu(l, h, size, real);
480
+ *pp = p;
481
+ }
482
+ update_stats_rcu_unlock(l);
483
+
484
+ return real ? 0 : ENOMEM;
485
+ }
486
+
487
+ static void *
488
+ memalign_result(int err, void *p)
489
+ {
490
+ if (caa_unlikely(err)) {
491
+ errno = err;
492
+ return 0;
493
+ }
494
+ return p;
495
+ }
496
+
497
+ void *memalign(size_t alignment, size_t size)
498
+ {
499
+ void *p;
500
+ int err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
501
+ return memalign_result(err, p);
502
+ }
503
+
504
+ int posix_memalign(void **p, size_t alignment, size_t size)
505
+ {
506
+ return internal_memalign(p, alignment, size, RETURN_ADDRESS(0));
507
+ }
508
+
509
+ void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign")));
510
+ void cfree(void *) __attribute__((alias("free")));
511
+
512
+ void *valloc(size_t size)
513
+ {
514
+ void *p;
515
+ int err = internal_memalign(&p, page_size, size, RETURN_ADDRESS(0));
516
+ return memalign_result(err, p);
517
+ }
518
+
519
+ #if __GNUC__ < 7
520
+ # define add_overflow_p(a,b) __extension__({ \
521
+ __typeof__(a) _c; \
522
+ __builtin_add_overflow(a,b,&_c); \
523
+ })
524
+ #else
525
+ # define add_overflow_p(a,b) \
526
+ __builtin_add_overflow_p((a),(b),(__typeof__(a+b))0)
527
+ #endif
528
+
529
+ void *pvalloc(size_t size)
530
+ {
531
+ size_t alignment = page_size;
532
+ void *p;
533
+ int err;
534
+
535
+ if (add_overflow_p(size, alignment)) {
536
+ errno = ENOMEM;
537
+ return 0;
538
+ }
539
+ size = size_align(size, alignment);
540
+ err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
541
+ return memalign_result(err, p);
262
542
  }
263
543
 
264
- /*
265
- * Do we care for *memalign? ruby/gc.c uses it in ways this lib
266
- * doesn't care about, but maybe some gems use it, too.
267
- */
268
544
  void *malloc(size_t size)
269
545
  {
270
- RETURN_IF_NOT_READY();
271
- update_stats(size, RETURN_ADDRESS(0));
272
- return real_malloc(size);
546
+ struct src_loc *l;
547
+ struct alloc_hdr *h;
548
+ size_t asize;
549
+ void *p;
550
+
551
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
552
+ goto enomem;
553
+
554
+ /*
555
+ * Needed for C++ global declarations using "new",
556
+ * which happens before our constructor
557
+ */
558
+ #ifndef __FreeBSD__
559
+ if (!real_malloc) {
560
+ if (resolving_malloc) goto enomem;
561
+ resolving_malloc = 1;
562
+ real_malloc = dlsym(RTLD_NEXT, "malloc");
563
+ }
564
+ #endif
565
+ l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
566
+ p = h = real_malloc(asize);
567
+ if (h) {
568
+ alloc_insert_rcu(l, h, size, h);
569
+ p = hdr2ptr(h);
570
+ }
571
+ update_stats_rcu_unlock(l);
572
+ if (caa_unlikely(!p)) errno = ENOMEM;
573
+ return p;
574
+ enomem:
575
+ errno = ENOMEM;
576
+ return 0;
273
577
  }
274
578
 
275
579
  void *calloc(size_t nmemb, size_t size)
276
580
  {
581
+ void *p;
582
+ struct src_loc *l;
583
+ struct alloc_hdr *h;
584
+ size_t asize;
585
+
586
+ if (__builtin_mul_overflow(size, nmemb, &size)) {
587
+ errno = ENOMEM;
588
+ return 0;
589
+ }
590
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
591
+ errno = ENOMEM;
592
+ return 0;
593
+ }
277
594
  RETURN_IF_NOT_READY();
278
- /* ruby_xcalloc already does overflow checking */
279
- update_stats(nmemb * size, RETURN_ADDRESS(0));
280
- return real_calloc(nmemb, size);
595
+ l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
596
+ p = h = real_malloc(asize);
597
+ if (p) {
598
+ alloc_insert_rcu(l, h, size, h);
599
+ p = hdr2ptr(h);
600
+ memset(p, 0, size);
601
+ }
602
+ update_stats_rcu_unlock(l);
603
+ if (caa_unlikely(!p)) errno = ENOMEM;
604
+ return p;
281
605
  }
282
606
 
283
607
  void *realloc(void *ptr, size_t size)
284
608
  {
609
+ void *p;
610
+ struct src_loc *l;
611
+ struct alloc_hdr *h;
612
+ size_t asize;
613
+
614
+ if (!size) {
615
+ free(ptr);
616
+ return 0;
617
+ }
618
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
619
+ errno = ENOMEM;
620
+ return 0;
621
+ }
285
622
  RETURN_IF_NOT_READY();
286
- update_stats(size, RETURN_ADDRESS(0));
287
- return real_realloc(ptr, size);
623
+
624
+ l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
625
+ p = h = real_malloc(asize);
626
+ if (p) {
627
+ alloc_insert_rcu(l, h, size, h);
628
+ p = hdr2ptr(h);
629
+ }
630
+ update_stats_rcu_unlock(l);
631
+
632
+ if (ptr && p) {
633
+ struct alloc_hdr *old = ptr2hdr(ptr);
634
+ memcpy(p, ptr, old->size < size ? old->size : size);
635
+ free(ptr);
636
+ }
637
+ if (caa_unlikely(!p)) errno = ENOMEM;
638
+ return p;
288
639
  }
289
640
 
290
641
  struct dump_arg {
@@ -314,7 +665,7 @@ static void *dump_to_file(void *x)
314
665
  p = s[0];
315
666
  }
316
667
  fprintf(a->fp, "%16zu %12zu %s\n",
317
- l->total, l->calls, (const char *)p);
668
+ l->total, l->allocations, (const char *)p);
318
669
  if (s) free(s);
319
670
  }
320
671
  out_unlock:
@@ -349,6 +700,7 @@ static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
349
700
  io = *((VALUE *)dlsym(RTLD_DEFAULT, "rb_stderr"));
350
701
 
351
702
  a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
703
+ io = rb_io_get_io(io);
352
704
  io = rb_io_get_write_io(io);
353
705
  GetOpenFile(io, fptr);
354
706
  a.fp = rb_io_stdio_file(fptr);
@@ -358,49 +710,6 @@ static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
358
710
  return Qnil;
359
711
  }
360
712
 
361
- static void
362
- free_src_loc(struct rcu_head *head)
363
- {
364
- struct src_loc *l = caa_container_of(head, struct src_loc, rcu_head);
365
- free(l);
366
- }
367
-
368
- static void *totals_clear(void *ign)
369
- {
370
- struct cds_lfht *new, *old;
371
- struct cds_lfht_iter iter;
372
- struct src_loc *l;
373
-
374
- new = lfht_new();
375
- rcu_read_lock();
376
- old = rcu_dereference(totals);
377
- rcu_assign_pointer(totals, new);
378
- cds_lfht_for_each_entry(old, &iter, l, hnode) {
379
- cds_lfht_del(old, &l->hnode);
380
- call_rcu(&l->rcu_head, free_src_loc);
381
- }
382
- rcu_read_unlock();
383
-
384
- synchronize_rcu(); /* ensure totals points to new */
385
- cds_lfht_destroy(old, NULL);
386
- return 0;
387
- }
388
-
389
- /*
390
- * call-seq:
391
- *
392
- * Mwrap.clear -> nil
393
- *
394
- * Atomically replaces the totals table and destroys the old one.
395
- * This resets all statistics. It is more expensive than `Mwrap.reset'
396
- * as new allocations will need to be made to repopulate the new table.
397
- */
398
- static VALUE mwrap_clear(VALUE mod)
399
- {
400
- rb_thread_call_without_gvl(totals_clear, 0, 0, 0);
401
- return Qnil;
402
- }
403
-
404
713
  static void *totals_reset(void *ign)
405
714
  {
406
715
  struct cds_lfht *t;
@@ -411,7 +720,10 @@ static void *totals_reset(void *ign)
411
720
  t = rcu_dereference(totals);
412
721
  cds_lfht_for_each_entry(t, &iter, l, hnode) {
413
722
  uatomic_set(&l->total, 0);
414
- uatomic_set(&l->calls, 0);
723
+ uatomic_set(&l->allocations, 0);
724
+ uatomic_set(&l->frees, 0);
725
+ uatomic_set(&l->age_total, 0);
726
+ uatomic_set(&l->max_lifespan, 0);
415
727
  }
416
728
  rcu_read_unlock();
417
729
  return 0;
@@ -423,8 +735,8 @@ static void *totals_reset(void *ign)
423
735
  * Mwrap.reset -> nil
424
736
  *
425
737
  * Resets the the total tables by zero-ing all counters.
426
- * This resets all statistics and is less costly than `Mwrap.clear'
427
- * but is not an atomic operation.
738
+ * This resets all statistics. This is not an atomic operation
739
+ * as other threads (outside of GVL) may increment counters.
428
740
  */
429
741
  static VALUE mwrap_reset(VALUE mod)
430
742
  {
@@ -432,13 +744,40 @@ static VALUE mwrap_reset(VALUE mod)
432
744
  return Qnil;
433
745
  }
434
746
 
435
- static VALUE dump_ensure(VALUE ignored)
747
+ /* :nodoc: */
748
+ static VALUE mwrap_clear(VALUE mod)
749
+ {
750
+ return mwrap_reset(mod);
751
+ }
752
+
753
+ static VALUE rcu_unlock_ensure(VALUE ignored)
436
754
  {
437
755
  rcu_read_unlock();
438
756
  --locating;
439
757
  return Qfalse;
440
758
  }
441
759
 
760
+ static VALUE location_string(struct src_loc *l)
761
+ {
762
+ VALUE ret, tmp;
763
+
764
+ if (loc_is_addr(l)) {
765
+ char **s = backtrace_symbols((void *)l->k, 1);
766
+ tmp = rb_str_new_cstr(s[0]);
767
+ free(s);
768
+ }
769
+ else {
770
+ tmp = rb_str_new(l->k, l->capa - 1);
771
+ }
772
+
773
+ /* deduplicate and try to free up some memory */
774
+ ret = rb_funcall(tmp, id_uminus, 0);
775
+ if (!OBJ_FROZEN_RAW(tmp))
776
+ rb_str_resize(tmp, 0);
777
+
778
+ return ret;
779
+ }
780
+
442
781
  static VALUE dump_each_rcu(VALUE x)
443
782
  {
444
783
  struct dump_arg *a = (struct dump_arg *)x;
@@ -448,27 +787,17 @@ static VALUE dump_each_rcu(VALUE x)
448
787
 
449
788
  t = rcu_dereference(totals);
450
789
  cds_lfht_for_each_entry(t, &iter, l, hnode) {
451
- VALUE v[3];
790
+ VALUE v[6];
452
791
  if (l->total <= a->min) continue;
453
792
 
454
- if (loc_is_addr(l)) {
455
- char **s = backtrace_symbols((void *)l->k, 1);
456
- v[1] = rb_str_new_cstr(s[0]);
457
- free(s);
458
- }
459
- else {
460
- v[1] = rb_str_new(l->k, l->capa - 1);
461
- }
462
-
463
- /* deduplicate and try to free up some memory */
464
- v[0] = rb_funcall(v[1], id_uminus, 0);
465
- if (!OBJ_FROZEN_RAW(v[1]))
466
- rb_str_resize(v[1], 0);
467
-
793
+ v[0] = location_string(l);
468
794
  v[1] = SIZET2NUM(l->total);
469
- v[2] = SIZET2NUM(l->calls);
795
+ v[2] = SIZET2NUM(l->allocations);
796
+ v[3] = SIZET2NUM(l->frees);
797
+ v[4] = SIZET2NUM(l->age_total);
798
+ v[5] = SIZET2NUM(l->max_lifespan);
470
799
 
471
- rb_yield_values2(3, v);
800
+ rb_yield_values2(6, v);
472
801
  assert(rcu_read_ongoing());
473
802
  }
474
803
  return Qnil;
@@ -477,10 +806,12 @@ static VALUE dump_each_rcu(VALUE x)
477
806
  /*
478
807
  * call-seq:
479
808
  *
480
- * Mwrap.each([min]) { |location,total_bytes,call_count| ... }
809
+ * Mwrap.each([min]) do |location,total,allocations,frees,age_total,max_lifespan|
810
+ * ...
811
+ * end
481
812
  *
482
813
  * Yields each entry of the of the table to a caller-supplied block.
483
- * +min+ may be specified to filter out lines with +total_bytes+
814
+ * +min+ may be specified to filter out lines with +total+ bytes
484
815
  * equal-to-or-smaller-than the supplied minimum.
485
816
  */
486
817
  static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
@@ -494,7 +825,212 @@ static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
494
825
  ++locating;
495
826
  rcu_read_lock();
496
827
 
497
- return rb_ensure(dump_each_rcu, (VALUE)&a, dump_ensure, 0);
828
+ return rb_ensure(dump_each_rcu, (VALUE)&a, rcu_unlock_ensure, 0);
829
+ }
830
+
831
+ static size_t
832
+ src_loc_memsize(const void *p)
833
+ {
834
+ return sizeof(struct src_loc);
835
+ }
836
+
837
+ static const rb_data_type_t src_loc_type = {
838
+ "source_location",
839
+ /* no marking, no freeing */
840
+ { 0, 0, src_loc_memsize, /* reserved */ },
841
+ /* parent, data, [ flags ] */
842
+ };
843
+
844
+ static VALUE cSrcLoc;
845
+
846
+ static int
847
+ extract_addr(const char *str, size_t len, void **p)
848
+ {
849
+ const char *c;
850
+ #if defined(__GLIBC__)
851
+ return ((c = memrchr(str, '[', len)) && sscanf(c, "[%p]", p));
852
+ #else /* tested FreeBSD */
853
+ return ((c = strstr(str, "0x")) && sscanf(c, "%p", p));
854
+ #endif
855
+ }
856
+
857
+ /*
858
+ * call-seq:
859
+ * Mwrap[location] -> Mwrap::SourceLocation
860
+ *
861
+ * Returns the associated Mwrap::SourceLocation given the +location+
862
+ * String. +location+ is either a Ruby source location path:line
863
+ * (e.g. "/path/to/foo.rb:5") or a hexadecimal memory address with
864
+ * square-braces part yielded by Mwrap.dump (e.g. "[0xdeadbeef]")
865
+ */
866
+ static VALUE mwrap_aref(VALUE mod, VALUE loc)
867
+ {
868
+ const char *str = StringValueCStr(loc);
869
+ int len = RSTRING_LENINT(loc);
870
+ struct src_loc *k = 0;
871
+ uintptr_t p;
872
+ struct cds_lfht_iter iter;
873
+ struct cds_lfht_node *cur;
874
+ struct cds_lfht *t;
875
+ struct src_loc *l;
876
+ VALUE val = Qnil;
877
+
878
+ if (extract_addr(str, len, (void **)&p)) {
879
+ k = (void *)kbuf;
880
+ memcpy(k->k, &p, sizeof(p));
881
+ k->capa = 0;
882
+ k->hval = jhash(k->k, sizeof(p), 0xdeadbeef);
883
+ } else {
884
+ k = (void *)kbuf;
885
+ memcpy(k->k, str, len + 1);
886
+ k->capa = len + 1;
887
+ k->hval = jhash(k->k, k->capa, 0xdeadbeef);
888
+ }
889
+
890
+ if (!k) return val;
891
+
892
+ rcu_read_lock();
893
+ t = rcu_dereference(totals);
894
+ if (!t) goto out_unlock;
895
+
896
+ cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
897
+ cur = cds_lfht_iter_get_node(&iter);
898
+ if (cur) {
899
+ l = caa_container_of(cur, struct src_loc, hnode);
900
+ val = TypedData_Wrap_Struct(cSrcLoc, &src_loc_type, l);
901
+ }
902
+ out_unlock:
903
+ rcu_read_unlock();
904
+ return val;
905
+ }
906
+
907
+ static VALUE src_loc_each_i(VALUE p)
908
+ {
909
+ struct alloc_hdr *h;
910
+ struct src_loc *l = (struct src_loc *)p;
911
+
912
+ cds_list_for_each_entry_rcu(h, &l->allocs, anode) {
913
+ size_t gen = uatomic_read(&h->as.live.gen);
914
+ size_t size = uatomic_read(&h->size);
915
+
916
+ if (size) {
917
+ VALUE v[2];
918
+ v[0] = SIZET2NUM(size);
919
+ v[1] = SIZET2NUM(gen);
920
+
921
+ rb_yield_values2(2, v);
922
+ }
923
+ }
924
+
925
+ return Qfalse;
926
+ }
927
+
928
+ static struct src_loc *src_loc_get(VALUE self)
929
+ {
930
+ struct src_loc *l;
931
+ TypedData_Get_Struct(self, struct src_loc, &src_loc_type, l);
932
+ assert(l);
933
+ return l;
934
+ }
935
+
936
+ /*
937
+ * call-seq:
938
+ * loc = Mwrap[location]
939
+ * loc.each { |size,generation| ... }
940
+ *
941
+ * Iterates through live allocations for a given Mwrap::SourceLocation,
942
+ * yielding the +size+ (in bytes) and +generation+ of each allocation.
943
+ * The +generation+ is the value of the GC.count method at the time
944
+ * the allocation was made.
945
+ *
946
+ * This functionality is only available in mwrap 2.0.0+
947
+ */
948
+ static VALUE src_loc_each(VALUE self)
949
+ {
950
+ struct src_loc *l = src_loc_get(self);
951
+
952
+ assert(locating == 0 && "forgot to clear locating");
953
+ ++locating;
954
+ rcu_read_lock();
955
+ rb_ensure(src_loc_each_i, (VALUE)l, rcu_unlock_ensure, 0);
956
+ return self;
957
+ }
958
+
959
+ /*
960
+ * The the mean lifespan (in GC generations) of allocations made from this
961
+ * location. This does not account for live allocations.
962
+ */
963
+ static VALUE src_loc_mean_lifespan(VALUE self)
964
+ {
965
+ struct src_loc *l = src_loc_get(self);
966
+ size_t tot, frees;
967
+
968
+ frees = uatomic_read(&l->frees);
969
+ tot = uatomic_read(&l->age_total);
970
+ return DBL2NUM(frees ? ((double)tot/(double)frees) : HUGE_VAL);
971
+ }
972
+
973
+ /* The number of frees made from this location */
974
+ static VALUE src_loc_frees(VALUE self)
975
+ {
976
+ return SIZET2NUM(uatomic_read(&src_loc_get(self)->frees));
977
+ }
978
+
979
+ /* The number of allocations made from this location */
980
+ static VALUE src_loc_allocations(VALUE self)
981
+ {
982
+ return SIZET2NUM(uatomic_read(&src_loc_get(self)->allocations));
983
+ }
984
+
985
+ /* The total number of bytes allocated from this location */
986
+ static VALUE src_loc_total(VALUE self)
987
+ {
988
+ return SIZET2NUM(uatomic_read(&src_loc_get(self)->total));
989
+ }
990
+
991
+ /*
992
+ * The maximum age (in GC generations) of an allocation before it was freed.
993
+ * This does not account for live allocations.
994
+ */
995
+ static VALUE src_loc_max_lifespan(VALUE self)
996
+ {
997
+ return SIZET2NUM(uatomic_read(&src_loc_get(self)->max_lifespan));
998
+ }
999
+
1000
+ /*
1001
+ * Returns a frozen String location of the given SourceLocation object.
1002
+ */
1003
+ static VALUE src_loc_name(VALUE self)
1004
+ {
1005
+ struct src_loc *l = src_loc_get(self);
1006
+ VALUE ret;
1007
+
1008
+ ++locating;
1009
+ ret = location_string(l);
1010
+ --locating;
1011
+ return ret;
1012
+ }
1013
+
1014
+ static VALUE reset_locating(VALUE ign) { --locating; return Qfalse; }
1015
+
1016
+ /*
1017
+ * call-seq:
1018
+ *
1019
+ * Mwrap.quiet do |depth|
1020
+ * # expensive sort/calculate/emitting results of Mwrap.each
1021
+ * # affecting statistics of the rest of the app
1022
+ * end
1023
+ *
1024
+ * Stops allocation tracking inside the block. This is useful for
1025
+ * monitoring code which calls other Mwrap (or ObjectSpace/GC)
1026
+ * functions which unavoidably allocate memory.
1027
+ *
1028
+ * This feature was added in mwrap 2.0.0+
1029
+ */
1030
+ static VALUE mwrap_quiet(VALUE mod)
1031
+ {
1032
+ size_t cur = ++locating;
1033
+ return rb_ensure(rb_yield, SIZET2NUM(cur), reset_locating, 0);
498
1034
  }
499
1035
 
500
1036
  /*
@@ -515,19 +1051,47 @@ static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
515
1051
  * * dump_fd: a writable FD to dump to
516
1052
  * * dump_path: a path to dump to, the file is opened in O_APPEND mode
517
1053
  * * dump_min: the minimum allocation size (total) to dump
1054
+ * * memalign: use `1' to enable tracking the memalign family
518
1055
  *
519
1056
  * If both `dump_fd' and `dump_path' are specified, dump_path takes
520
1057
  * precedence.
1058
+ *
1059
+ * Tracking the memalign family of functions is misleading for Ruby
1060
+ * applications, as heap page allocations can happen anywhere a
1061
+ * Ruby object is allocated, even in the coldest code paths.
1062
+ * Furthermore, it is rarely-used outside of the Ruby object allocator.
1063
+ * Thus tracking memalign functions is disabled by default.
521
1064
  */
522
1065
  void Init_mwrap(void)
523
1066
  {
524
- VALUE mod = rb_define_module("Mwrap");
1067
+ VALUE mod;
1068
+
1069
+ ++locating;
1070
+ mod = rb_define_module("Mwrap");
525
1071
  id_uminus = rb_intern("-@");
526
1072
 
1073
+ /*
1074
+ * Represents a location in source code or library
1075
+ * address which calls a memory allocation. It is
1076
+ * updated automatically as allocations are made, so
1077
+ * there is no need to reload or reread it from Mwrap#[].
1078
+ * This class is only available since mwrap 2.0.0+.
1079
+ */
1080
+ cSrcLoc = rb_define_class_under(mod, "SourceLocation", rb_cObject);
527
1081
  rb_define_singleton_method(mod, "dump", mwrap_dump, -1);
528
- rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
529
1082
  rb_define_singleton_method(mod, "reset", mwrap_reset, 0);
1083
+ rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
530
1084
  rb_define_singleton_method(mod, "each", mwrap_each, -1);
1085
+ rb_define_singleton_method(mod, "[]", mwrap_aref, 1);
1086
+ rb_define_singleton_method(mod, "quiet", mwrap_quiet, 0);
1087
+ rb_define_method(cSrcLoc, "each", src_loc_each, 0);
1088
+ rb_define_method(cSrcLoc, "frees", src_loc_frees, 0);
1089
+ rb_define_method(cSrcLoc, "allocations", src_loc_allocations, 0);
1090
+ rb_define_method(cSrcLoc, "total", src_loc_total, 0);
1091
+ rb_define_method(cSrcLoc, "mean_lifespan", src_loc_mean_lifespan, 0);
1092
+ rb_define_method(cSrcLoc, "max_lifespan", src_loc_max_lifespan, 0);
1093
+ rb_define_method(cSrcLoc, "name", src_loc_name, 0);
1094
+ --locating;
531
1095
  }
532
1096
 
533
1097
  /* rb_cloexec_open isn't usable by non-Ruby processes */
@@ -538,18 +1102,18 @@ void Init_mwrap(void)
538
1102
  __attribute__ ((destructor))
539
1103
  static void mwrap_dump_destructor(void)
540
1104
  {
541
- const char *opt = getenv("MWRAP");
542
- const char *modes[] = { "a", "a+", "w", "w+", "r+" };
543
- struct dump_arg a;
544
- size_t i;
545
- int dump_fd;
1105
+ const char *opt = getenv("MWRAP");
1106
+ const char *modes[] = { "a", "a+", "w", "w+", "r+" };
1107
+ struct dump_arg a;
1108
+ size_t i;
1109
+ int dump_fd;
546
1110
  char *dump_path;
547
1111
 
548
1112
  if (!opt)
549
1113
  return;
550
1114
 
551
- ++locating;
552
- if ((dump_path = strstr(opt, "dump_path:")) &&
1115
+ ++locating;
1116
+ if ((dump_path = strstr(opt, "dump_path:")) &&
553
1117
  (dump_path += sizeof("dump_path")) &&
554
1118
  *dump_path) {
555
1119
  char *end = strchr(dump_path, ',');
@@ -594,5 +1158,5 @@ static void mwrap_dump_destructor(void)
594
1158
  }
595
1159
  dump_to_file(&a);
596
1160
  out:
597
- --locating;
1161
+ --locating;
598
1162
  }