mwrap 1.0.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.document +2 -0
- data/MANIFEST +1 -0
- data/README +31 -21
- data/bin/mwrap +7 -0
- data/ext/mwrap/extconf.rb +15 -0
- data/ext/mwrap/mwrap.c +710 -146
- data/mwrap.gemspec +1 -1
- data/test/test_mwrap.rb +126 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8559afec1946b7f545c944085aac5e205601deb82fa9f1529785dd3ef7526e5a
|
4
|
+
data.tar.gz: 34ea90410103ec59f367baa00e1e4df6424fe67af8608b5c7a3a0cb66fec2440
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff91f6f250e7a18cb465a6dc04ddee374411a10a92cd1b3227e09a4ccd0be1b8597f475f71d5061bd5bc36e8c0418ce3bf6faa3a78b37aec5d21b0340f736031
|
7
|
+
data.tar.gz: 06311f2949ae4dae6bd2d0e95e6b0d147b44b6bae6cd4464e4c1d160fc1f5f3f99b2a6a3a3e711a10b8894da3bdb6220ccfeb72dfb18545170edbbc65dbad225
|
data/.document
ADDED
data/MANIFEST
CHANGED
data/README
CHANGED
@@ -4,20 +4,23 @@ mwrap is designed to answer the question:
|
|
4
4
|
|
5
5
|
Which lines of Ruby are hitting malloc the most?
|
6
6
|
|
7
|
-
mwrap wraps all malloc
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
7
|
+
mwrap wraps all malloc-family calls to trace the Ruby source
|
8
|
+
location of such calls and bytes allocated at each callsite.
|
9
|
+
As of mwrap 2.0.0, it can also function as a leak detector
|
10
|
+
and show live allocations at every call site. Depending on
|
11
|
+
your application and workload, the overhead is roughly a 50%
|
12
|
+
increase memory and runtime.
|
13
|
+
|
14
|
+
It works best for allocations under GVL, but tries to track
|
15
|
+
numeric caller addresses for allocations made without GVL so you
|
16
|
+
can get an idea of how much memory usage certain extensions and
|
17
|
+
native libraries use.
|
15
18
|
|
16
19
|
It requires the concurrent lock-free hash table from the
|
17
20
|
Userspace RCU project: https://liburcu.org/
|
18
21
|
|
19
|
-
It does not require recompiling or rebuilding Ruby, but only
|
20
|
-
Ruby trunk (2.6.0dev+) on a few platforms:
|
22
|
+
It does not require recompiling or rebuilding Ruby, but only
|
23
|
+
supports Ruby trunk (2.6.0dev+) on a few platforms:
|
21
24
|
|
22
25
|
* GNU/Linux
|
23
26
|
* FreeBSD (tested 11.1)
|
@@ -36,28 +39,35 @@ It may work on NetBSD, OpenBSD and DragonFly BSD.
|
|
36
39
|
== Usage
|
37
40
|
|
38
41
|
mwrap works as an LD_PRELOAD and supplies a mwrap RubyGem executable to
|
39
|
-
improve ease-of-use. You can set
|
40
|
-
variable to
|
41
|
-
|
42
|
-
# Dump results to stderr at exit:
|
43
|
-
MWRAP=dump_fd:2 mwrap RUBY_COMMAND
|
44
|
-
|
45
|
-
You may also set dump_path to append to a log file:
|
42
|
+
improve ease-of-use. You can set dump_path: in the MWRAP environment
|
43
|
+
variable to append the results to a log file:
|
46
44
|
|
47
45
|
MWRAP=dump_path:/path/to/log mwrap RUBY_COMMAND
|
48
46
|
|
49
|
-
|
50
|
-
|
47
|
+
# And to display the locations with the most allocations:
|
48
|
+
sort -k1,1rn </path/to/log | $PAGER
|
49
|
+
|
50
|
+
You may also `require "mwrap"' in your Ruby code and use
|
51
|
+
Mwrap.dump, Mwrap.reset, Mwrap.each, etc.
|
51
52
|
|
52
53
|
However, mwrap MUST be loaded via LD_PRELOAD to have any
|
53
|
-
effect in tracking malloc use.
|
54
|
+
effect in tracking malloc use. However, it is safe to keep
|
55
|
+
"require 'mwrap'" in performance-critical deployments,
|
56
|
+
as overhead is only incurred when used as an LD_PRELOAD.
|
54
57
|
|
55
58
|
The output of the mwrap dump is a text file with 3 columns:
|
56
59
|
|
57
60
|
total_bytes call_count location
|
58
61
|
|
59
62
|
Where location is a Ruby source location (if made under GVL)
|
60
|
-
or an address retrieved by backtrace_symbols(3)
|
63
|
+
or an address retrieved by backtrace_symbols(3). It is
|
64
|
+
recommended to use the sort(1) command on either of the
|
65
|
+
first two columns to find the hottest malloc locations.
|
66
|
+
|
67
|
+
mwrap 2.0.0+ also supports a Rack application endpoint,
|
68
|
+
it is documented at:
|
69
|
+
|
70
|
+
https://80x24.org/mwrap/MwrapRack.html
|
61
71
|
|
62
72
|
== Known problems
|
63
73
|
|
data/bin/mwrap
CHANGED
@@ -26,4 +26,11 @@ if ENV['MWRAP'] =~ /dump_fd:(\d+)/
|
|
26
26
|
opts[dump_fd] = dump_io
|
27
27
|
end
|
28
28
|
end
|
29
|
+
|
30
|
+
# allow inheriting FDs from systemd
|
31
|
+
n = ENV['LISTEN_FDS']
|
32
|
+
if n && ENV['LISTEN_PID'].to_i == $$
|
33
|
+
n = 3 + n.to_i
|
34
|
+
(3...n).each { |fd| opts[fd] = IO.new(fd) }
|
35
|
+
end
|
29
36
|
exec *ARGV, opts
|
data/ext/mwrap/extconf.rb
CHANGED
@@ -10,4 +10,19 @@ have_library 'urcu-bp' or abort 'liburcu-bp not found'
|
|
10
10
|
have_library 'dl'
|
11
11
|
have_library 'c'
|
12
12
|
have_library 'execinfo' # FreeBSD
|
13
|
+
|
14
|
+
if try_link(<<'')
|
15
|
+
int main(void) { return __builtin_add_overflow_p(0,0,(int)1); }
|
16
|
+
|
17
|
+
$defs << '-DHAVE_BUILTIN_ADD_OVERFLOW_P'
|
18
|
+
end
|
19
|
+
|
20
|
+
if try_link(<<'')
|
21
|
+
int main(int a) { return __builtin_add_overflow(0,0,&a); }
|
22
|
+
|
23
|
+
$defs << '-DHAVE_BUILTIN_ADD_OVERFLOW_P'
|
24
|
+
else
|
25
|
+
abort 'missing __builtin_add_overflow'
|
26
|
+
end
|
27
|
+
|
13
28
|
create_makefile 'mwrap'
|
data/ext/mwrap/mwrap.c
CHANGED
@@ -16,26 +16,40 @@
|
|
16
16
|
#include <sys/types.h>
|
17
17
|
#include <sys/stat.h>
|
18
18
|
#include <fcntl.h>
|
19
|
+
#include <pthread.h>
|
19
20
|
#include <urcu-bp.h>
|
20
21
|
#include <urcu/rculfhash.h>
|
22
|
+
#include <urcu/rculist.h>
|
21
23
|
#include "jhash.h"
|
22
24
|
|
23
25
|
static ID id_uminus;
|
26
|
+
static unsigned int track_memalign;
|
24
27
|
const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev */
|
25
|
-
|
28
|
+
extern int __attribute__((weak)) ruby_thread_has_gvl_p(void);
|
29
|
+
extern void * __attribute__((weak)) ruby_current_execution_context_ptr;
|
30
|
+
extern void * __attribute__((weak)) ruby_current_vm_ptr; /* for rb_gc_count */
|
31
|
+
extern size_t __attribute__((weak)) rb_gc_count(void);
|
32
|
+
extern VALUE __attribute__((weak)) rb_cObject;
|
33
|
+
extern VALUE __attribute__((weak)) rb_yield(VALUE);
|
34
|
+
|
35
|
+
/* true for glibc/dlmalloc/ptmalloc, not sure about jemalloc */
|
36
|
+
#define ASSUMED_MALLOC_ALIGNMENT (sizeof(void *) * 2)
|
37
|
+
|
38
|
+
int __attribute__((weak)) ruby_thread_has_gvl_p(void)
|
39
|
+
{
|
40
|
+
return 0;
|
41
|
+
}
|
42
|
+
|
26
43
|
#ifdef __FreeBSD__
|
27
44
|
void *__malloc(size_t);
|
28
|
-
void
|
29
|
-
|
30
|
-
|
31
|
-
static void *(*real_calloc)(size_t, size_t) = __calloc;
|
32
|
-
static void *(*real_realloc)(void *, size_t) = __realloc;
|
33
|
-
# define RETURN_IF_NOT_READY() do {} while (0) /* nothing */
|
45
|
+
void __free(void *);
|
46
|
+
# define real_malloc __malloc
|
47
|
+
# define real_free __free
|
34
48
|
#else
|
35
|
-
static int ready;
|
36
49
|
static void *(*real_malloc)(size_t);
|
37
|
-
static void
|
38
|
-
static
|
50
|
+
static void (*real_free)(void *);
|
51
|
+
static int resolving_malloc;
|
52
|
+
#endif /* !FreeBSD */
|
39
53
|
|
40
54
|
/*
|
41
55
|
* we need to fake an OOM condition while dlsym is running,
|
@@ -43,22 +57,33 @@ static void *(*real_realloc)(void *, size_t);
|
|
43
57
|
* symbol for the jemalloc calloc, yet
|
44
58
|
*/
|
45
59
|
# define RETURN_IF_NOT_READY() do { \
|
46
|
-
if (!
|
60
|
+
if (!real_malloc) { \
|
47
61
|
errno = ENOMEM; \
|
48
62
|
return NULL; \
|
49
63
|
} \
|
50
64
|
} while (0)
|
51
65
|
|
52
|
-
|
66
|
+
static __thread size_t locating;
|
67
|
+
static size_t generation;
|
68
|
+
static size_t page_size;
|
69
|
+
static struct cds_lfht *totals;
|
70
|
+
union padded_mutex {
|
71
|
+
pthread_mutex_t mtx;
|
72
|
+
char pad[64];
|
73
|
+
};
|
53
74
|
|
54
|
-
/*
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
75
|
+
/* a round-robin pool of mutexes */
|
76
|
+
#define MUTEX_NR (1 << 6)
|
77
|
+
#define MUTEX_MASK (MUTEX_NR - 1)
|
78
|
+
static size_t mutex_i;
|
79
|
+
static union padded_mutex mutexes[MUTEX_NR] = {
|
80
|
+
[0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
|
81
|
+
};
|
60
82
|
|
61
|
-
static
|
83
|
+
static pthread_mutex_t *mutex_assign(void)
|
84
|
+
{
|
85
|
+
return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx;
|
86
|
+
}
|
62
87
|
|
63
88
|
static struct cds_lfht *
|
64
89
|
lfht_new(void)
|
@@ -69,19 +94,40 @@ lfht_new(void)
|
|
69
94
|
__attribute__((constructor)) static void resolve_malloc(void)
|
70
95
|
{
|
71
96
|
int err;
|
97
|
+
const char *opt;
|
98
|
+
++locating;
|
72
99
|
|
73
|
-
#
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
100
|
+
#ifdef __FreeBSD__
|
101
|
+
/*
|
102
|
+
* PTHREAD_MUTEX_INITIALIZER on FreeBSD means lazy initialization,
|
103
|
+
* which happens at pthread_mutex_lock, and that calls calloc
|
104
|
+
*/
|
105
|
+
{
|
106
|
+
size_t i;
|
107
|
+
|
108
|
+
for (i = 0; i < MUTEX_NR; i++) {
|
109
|
+
err = pthread_mutex_init(&mutexes[i].mtx, 0);
|
110
|
+
if (err) {
|
111
|
+
fprintf(stderr, "error: %s\n", strerror(err));
|
112
|
+
_exit(1);
|
113
|
+
}
|
114
|
+
}
|
115
|
+
/* initialize mutexes used by urcu-bp */
|
116
|
+
rcu_read_lock();
|
117
|
+
rcu_read_unlock();
|
118
|
+
}
|
119
|
+
#else /* !FreeBSD (tested on GNU/Linux) */
|
120
|
+
if (!real_malloc) {
|
121
|
+
resolving_malloc = 1;
|
122
|
+
real_malloc = dlsym(RTLD_NEXT, "malloc");
|
123
|
+
}
|
124
|
+
real_free = dlsym(RTLD_NEXT, "free");
|
125
|
+
if (!real_malloc || !real_free) {
|
126
|
+
fprintf(stderr, "missing malloc/aligned_alloc/free\n"
|
127
|
+
"\t%p %p\n", real_malloc, real_free);
|
80
128
|
_exit(1);
|
81
129
|
}
|
82
|
-
|
83
|
-
#endif
|
84
|
-
|
130
|
+
#endif /* !FreeBSD */
|
85
131
|
totals = lfht_new();
|
86
132
|
if (!totals)
|
87
133
|
fprintf(stderr, "failed to allocate totals table\n");
|
@@ -91,14 +137,27 @@ __attribute__((constructor)) static void resolve_malloc(void)
|
|
91
137
|
call_rcu_after_fork_child);
|
92
138
|
if (err)
|
93
139
|
fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err));
|
140
|
+
page_size = sysconf(_SC_PAGESIZE);
|
141
|
+
opt = getenv("MWRAP");
|
142
|
+
if (opt && (opt = strstr(opt, "memalign:"))) {
|
143
|
+
if (!sscanf(opt, "memalign:%u", &track_memalign))
|
144
|
+
fprintf(stderr, "not an unsigned int: %s\n", opt);
|
145
|
+
}
|
146
|
+
--locating;
|
147
|
+
}
|
94
148
|
|
95
|
-
|
149
|
+
static void
|
150
|
+
mutex_lock(pthread_mutex_t *m)
|
151
|
+
{
|
152
|
+
int err = pthread_mutex_lock(m);
|
153
|
+
assert(err == 0);
|
154
|
+
}
|
96
155
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
156
|
+
static void
|
157
|
+
mutex_unlock(pthread_mutex_t *m)
|
158
|
+
{
|
159
|
+
int err = pthread_mutex_unlock(m);
|
160
|
+
assert(err == 0);
|
102
161
|
}
|
103
162
|
|
104
163
|
#ifndef HAVE_MEMPCPY
|
@@ -114,8 +173,6 @@ my_mempcpy(void *dest, const void *src, size_t n)
|
|
114
173
|
#define RETURN_ADDRESS(nr) \
|
115
174
|
(uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
|
116
175
|
|
117
|
-
static __thread size_t locating;
|
118
|
-
|
119
176
|
#define INT2STR_MAX (sizeof(int) == 4 ? 10 : 19)
|
120
177
|
static char *int2str(int num, char *dst, size_t * size)
|
121
178
|
{
|
@@ -143,21 +200,58 @@ static char *int2str(int num, char *dst, size_t * size)
|
|
143
200
|
return NULL;
|
144
201
|
}
|
145
202
|
|
203
|
+
/*
|
204
|
+
* rb_source_location_cstr relies on GET_EC(), and it's possible
|
205
|
+
* to have a native thread but no EC during the early and late
|
206
|
+
* (teardown) phases of the Ruby process
|
207
|
+
*/
|
146
208
|
static int has_ec_p(void)
|
147
209
|
{
|
148
|
-
return (
|
210
|
+
return (ruby_thread_has_gvl_p() && ruby_current_vm_ptr &&
|
211
|
+
ruby_current_execution_context_ptr);
|
149
212
|
}
|
150
213
|
|
214
|
+
/* allocated via real_malloc/real_free */
|
151
215
|
struct src_loc {
|
152
|
-
|
153
|
-
size_t calls;
|
216
|
+
pthread_mutex_t *mtx;
|
154
217
|
size_t total;
|
218
|
+
size_t allocations;
|
219
|
+
size_t frees;
|
220
|
+
size_t age_total; /* (age_total / frees) => mean age at free */
|
221
|
+
size_t max_lifespan;
|
155
222
|
struct cds_lfht_node hnode;
|
223
|
+
struct cds_list_head allocs; /* <=> alloc_hdr.node */
|
156
224
|
uint32_t hval;
|
157
225
|
uint32_t capa;
|
158
226
|
char k[];
|
159
227
|
};
|
160
228
|
|
229
|
+
/* every allocation has this in the header, maintain alignment with malloc */
|
230
|
+
struct alloc_hdr {
|
231
|
+
struct cds_list_head anode; /* <=> src_loc.allocs */
|
232
|
+
union {
|
233
|
+
struct {
|
234
|
+
size_t gen; /* rb_gc_count() */
|
235
|
+
struct src_loc *loc;
|
236
|
+
} live;
|
237
|
+
struct rcu_head dead;
|
238
|
+
} as;
|
239
|
+
void *real; /* what to call real_free on */
|
240
|
+
size_t size;
|
241
|
+
};
|
242
|
+
|
243
|
+
static char kbuf[PATH_MAX + INT2STR_MAX + sizeof(struct alloc_hdr) + 2];
|
244
|
+
|
245
|
+
static struct alloc_hdr *ptr2hdr(void *p)
|
246
|
+
{
|
247
|
+
return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr));
|
248
|
+
}
|
249
|
+
|
250
|
+
static void *hdr2ptr(struct alloc_hdr *h)
|
251
|
+
{
|
252
|
+
return (void *)((uintptr_t)h + sizeof(struct alloc_hdr));
|
253
|
+
}
|
254
|
+
|
161
255
|
static int loc_is_addr(const struct src_loc *l)
|
162
256
|
{
|
163
257
|
return l->capa == 0;
|
@@ -180,16 +274,14 @@ static int loc_eq(struct cds_lfht_node *node, const void *key)
|
|
180
274
|
memcmp(k->k, existing->k, loc_size(k)) == 0);
|
181
275
|
}
|
182
276
|
|
183
|
-
static
|
277
|
+
static struct src_loc *totals_add_rcu(struct src_loc *k)
|
184
278
|
{
|
185
279
|
struct cds_lfht_iter iter;
|
186
280
|
struct cds_lfht_node *cur;
|
187
|
-
struct src_loc *l;
|
281
|
+
struct src_loc *l = 0;
|
188
282
|
struct cds_lfht *t;
|
189
283
|
|
190
|
-
|
191
284
|
again:
|
192
|
-
rcu_read_lock();
|
193
285
|
t = rcu_dereference(totals);
|
194
286
|
if (!t) goto out_unlock;
|
195
287
|
cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
|
@@ -197,44 +289,58 @@ again:
|
|
197
289
|
if (cur) {
|
198
290
|
l = caa_container_of(cur, struct src_loc, hnode);
|
199
291
|
uatomic_add(&l->total, k->total);
|
200
|
-
uatomic_add(&l->
|
292
|
+
uatomic_add(&l->allocations, 1);
|
201
293
|
} else {
|
202
294
|
size_t n = loc_size(k);
|
203
|
-
l =
|
295
|
+
l = real_malloc(sizeof(*l) + n);
|
204
296
|
if (!l) goto out_unlock;
|
205
|
-
|
206
297
|
memcpy(l, k, sizeof(*l) + n);
|
207
|
-
l->
|
298
|
+
l->mtx = mutex_assign();
|
299
|
+
l->age_total = 0;
|
300
|
+
l->max_lifespan = 0;
|
301
|
+
l->frees = 0;
|
302
|
+
l->allocations = 1;
|
303
|
+
CDS_INIT_LIST_HEAD(&l->allocs);
|
208
304
|
cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
|
209
305
|
if (cur != &l->hnode) { /* lost race */
|
210
306
|
rcu_read_unlock();
|
211
|
-
|
307
|
+
real_free(l);
|
308
|
+
rcu_read_lock();
|
212
309
|
goto again;
|
213
310
|
}
|
214
311
|
}
|
215
312
|
out_unlock:
|
216
|
-
|
313
|
+
return l;
|
314
|
+
}
|
315
|
+
|
316
|
+
static void update_stats_rcu_unlock(const struct src_loc *l)
|
317
|
+
{
|
318
|
+
if (caa_likely(l)) rcu_read_unlock();
|
217
319
|
}
|
218
320
|
|
219
|
-
static
|
321
|
+
static struct src_loc *update_stats_rcu_lock(size_t size, uintptr_t caller)
|
220
322
|
{
|
221
|
-
struct src_loc *k;
|
323
|
+
struct src_loc *k, *ret = 0;
|
222
324
|
static const size_t xlen = sizeof(caller);
|
223
325
|
char *dst;
|
224
326
|
|
327
|
+
if (caa_unlikely(!totals)) return 0;
|
225
328
|
if (locating++) goto out; /* do not recurse into another *alloc */
|
226
329
|
|
227
|
-
|
330
|
+
rcu_read_lock();
|
331
|
+
if (has_ec_p()) {
|
228
332
|
int line;
|
229
333
|
const char *ptr = rb_source_location_cstr(&line);
|
230
334
|
size_t len;
|
231
335
|
size_t int_size = INT2STR_MAX;
|
232
336
|
|
337
|
+
generation = rb_gc_count();
|
338
|
+
|
233
339
|
if (!ptr) goto unknown;
|
234
340
|
|
235
341
|
/* avoid vsnprintf or anything which could call malloc here: */
|
236
342
|
len = strlen(ptr);
|
237
|
-
k =
|
343
|
+
k = (void *)kbuf;
|
238
344
|
k->total = size;
|
239
345
|
dst = mempcpy(k->k, ptr, len);
|
240
346
|
*dst++ = ':';
|
@@ -243,7 +349,7 @@ static void update_stats(size_t size, uintptr_t caller)
|
|
243
349
|
*dst = 0; /* terminate string */
|
244
350
|
k->capa = (uint32_t)(dst - k->k + 1);
|
245
351
|
k->hval = jhash(k->k, k->capa, 0xdeadbeef);
|
246
|
-
|
352
|
+
ret = totals_add_rcu(k);
|
247
353
|
} else {
|
248
354
|
rb_bug("bad math making key from location %s:%d\n",
|
249
355
|
ptr, line);
|
@@ -255,36 +361,281 @@ unknown:
|
|
255
361
|
memcpy(k->k, &caller, xlen);
|
256
362
|
k->capa = 0;
|
257
363
|
k->hval = jhash(k->k, xlen, 0xdeadbeef);
|
258
|
-
|
364
|
+
ret = totals_add_rcu(k);
|
259
365
|
}
|
260
366
|
out:
|
261
367
|
--locating;
|
368
|
+
return ret;
|
369
|
+
}
|
370
|
+
|
371
|
+
size_t malloc_usable_size(void *p)
|
372
|
+
{
|
373
|
+
return ptr2hdr(p)->size;
|
374
|
+
}
|
375
|
+
|
376
|
+
static void
|
377
|
+
free_hdr_rcu(struct rcu_head *dead)
|
378
|
+
{
|
379
|
+
struct alloc_hdr *h = caa_container_of(dead, struct alloc_hdr, as.dead);
|
380
|
+
real_free(h->real);
|
381
|
+
}
|
382
|
+
|
383
|
+
void free(void *p)
|
384
|
+
{
|
385
|
+
if (p) {
|
386
|
+
struct alloc_hdr *h = ptr2hdr(p);
|
387
|
+
struct src_loc *l = h->as.live.loc;
|
388
|
+
|
389
|
+
if (!real_free) return; /* oh well, leak a little */
|
390
|
+
if (l) {
|
391
|
+
size_t age = generation - h->as.live.gen;
|
392
|
+
|
393
|
+
uatomic_set(&h->size, 0);
|
394
|
+
uatomic_add(&l->frees, 1);
|
395
|
+
uatomic_add(&l->age_total, age);
|
396
|
+
|
397
|
+
mutex_lock(l->mtx);
|
398
|
+
cds_list_del_rcu(&h->anode);
|
399
|
+
if (age > l->max_lifespan)
|
400
|
+
l->max_lifespan = age;
|
401
|
+
mutex_unlock(l->mtx);
|
402
|
+
|
403
|
+
call_rcu(&h->as.dead, free_hdr_rcu);
|
404
|
+
}
|
405
|
+
else {
|
406
|
+
real_free(h->real);
|
407
|
+
}
|
408
|
+
}
|
409
|
+
}
|
410
|
+
|
411
|
+
static void
|
412
|
+
alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real)
|
413
|
+
{
|
414
|
+
/* we need src_loc to remain alive for the duration of this call */
|
415
|
+
if (!h) return;
|
416
|
+
h->size = size;
|
417
|
+
h->real = real;
|
418
|
+
h->as.live.loc = l;
|
419
|
+
h->as.live.gen = generation;
|
420
|
+
if (l) {
|
421
|
+
mutex_lock(l->mtx);
|
422
|
+
cds_list_add_rcu(&h->anode, &l->allocs);
|
423
|
+
mutex_unlock(l->mtx);
|
424
|
+
}
|
425
|
+
}
|
426
|
+
|
427
|
+
static size_t size_align(size_t size, size_t alignment)
|
428
|
+
{
|
429
|
+
return ((size + (alignment - 1)) & ~(alignment - 1));
|
430
|
+
}
|
431
|
+
|
432
|
+
static bool ptr_is_aligned(void *ptr, size_t alignment)
|
433
|
+
{
|
434
|
+
return ((uintptr_t)ptr & (alignment - 1)) == 0;
|
435
|
+
}
|
436
|
+
|
437
|
+
static void *ptr_align(void *ptr, size_t alignment)
|
438
|
+
{
|
439
|
+
return (void *)(((uintptr_t)ptr + (alignment - 1)) & ~(alignment - 1));
|
440
|
+
}
|
441
|
+
|
442
|
+
static bool is_power_of_two(size_t n) { return (n & (n - 1)) == 0; }
|
443
|
+
|
444
|
+
static int
|
445
|
+
internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
|
446
|
+
{
|
447
|
+
struct src_loc *l;
|
448
|
+
struct alloc_hdr *h;
|
449
|
+
void *real;
|
450
|
+
size_t asize;
|
451
|
+
size_t d = alignment / sizeof(void*);
|
452
|
+
size_t r = alignment % sizeof(void*);
|
453
|
+
|
454
|
+
if (!real_malloc) return ENOMEM;
|
455
|
+
|
456
|
+
if (r != 0 || d == 0 || !is_power_of_two(d))
|
457
|
+
return EINVAL;
|
458
|
+
|
459
|
+
if (alignment <= ASSUMED_MALLOC_ALIGNMENT) {
|
460
|
+
void *p = malloc(size);
|
461
|
+
if (!p) return ENOMEM;
|
462
|
+
*pp = p;
|
463
|
+
return 0;
|
464
|
+
}
|
465
|
+
for (; alignment < sizeof(struct alloc_hdr); alignment *= 2)
|
466
|
+
; /* double alignment until >= sizeof(struct alloc_hdr) */
|
467
|
+
if (__builtin_add_overflow(size, alignment, &asize) ||
|
468
|
+
__builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
|
469
|
+
return ENOMEM;
|
470
|
+
|
471
|
+
/* assert(asize == (alignment + size + sizeof(struct alloc_hdr))); */
|
472
|
+
l = track_memalign ? update_stats_rcu_lock(size, caller) : 0;
|
473
|
+
real = real_malloc(asize);
|
474
|
+
if (real) {
|
475
|
+
void *p = hdr2ptr(real);
|
476
|
+
if (!ptr_is_aligned(p, alignment))
|
477
|
+
p = ptr_align(p, alignment);
|
478
|
+
h = ptr2hdr(p);
|
479
|
+
alloc_insert_rcu(l, h, size, real);
|
480
|
+
*pp = p;
|
481
|
+
}
|
482
|
+
update_stats_rcu_unlock(l);
|
483
|
+
|
484
|
+
return real ? 0 : ENOMEM;
|
485
|
+
}
|
486
|
+
|
487
|
+
static void *
|
488
|
+
memalign_result(int err, void *p)
|
489
|
+
{
|
490
|
+
if (caa_unlikely(err)) {
|
491
|
+
errno = err;
|
492
|
+
return 0;
|
493
|
+
}
|
494
|
+
return p;
|
495
|
+
}
|
496
|
+
|
497
|
+
void *memalign(size_t alignment, size_t size)
|
498
|
+
{
|
499
|
+
void *p;
|
500
|
+
int err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
|
501
|
+
return memalign_result(err, p);
|
502
|
+
}
|
503
|
+
|
504
|
+
int posix_memalign(void **p, size_t alignment, size_t size)
|
505
|
+
{
|
506
|
+
return internal_memalign(p, alignment, size, RETURN_ADDRESS(0));
|
507
|
+
}
|
508
|
+
|
509
|
+
void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign")));
|
510
|
+
void cfree(void *) __attribute__((alias("free")));
|
511
|
+
|
512
|
+
void *valloc(size_t size)
|
513
|
+
{
|
514
|
+
void *p;
|
515
|
+
int err = internal_memalign(&p, page_size, size, RETURN_ADDRESS(0));
|
516
|
+
return memalign_result(err, p);
|
517
|
+
}
|
518
|
+
|
519
|
+
#if __GNUC__ < 7
|
520
|
+
# define add_overflow_p(a,b) __extension__({ \
|
521
|
+
__typeof__(a) _c; \
|
522
|
+
__builtin_add_overflow(a,b,&_c); \
|
523
|
+
})
|
524
|
+
#else
|
525
|
+
# define add_overflow_p(a,b) \
|
526
|
+
__builtin_add_overflow_p((a),(b),(__typeof__(a+b))0)
|
527
|
+
#endif
|
528
|
+
|
529
|
+
void *pvalloc(size_t size)
|
530
|
+
{
|
531
|
+
size_t alignment = page_size;
|
532
|
+
void *p;
|
533
|
+
int err;
|
534
|
+
|
535
|
+
if (add_overflow_p(size, alignment)) {
|
536
|
+
errno = ENOMEM;
|
537
|
+
return 0;
|
538
|
+
}
|
539
|
+
size = size_align(size, alignment);
|
540
|
+
err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
|
541
|
+
return memalign_result(err, p);
|
262
542
|
}
|
263
543
|
|
264
|
-
/*
|
265
|
-
* Do we care for *memalign? ruby/gc.c uses it in ways this lib
|
266
|
-
* doesn't care about, but maybe some gems use it, too.
|
267
|
-
*/
|
268
544
|
void *malloc(size_t size)
|
269
545
|
{
|
270
|
-
|
271
|
-
|
272
|
-
|
546
|
+
struct src_loc *l;
|
547
|
+
struct alloc_hdr *h;
|
548
|
+
size_t asize;
|
549
|
+
void *p;
|
550
|
+
|
551
|
+
if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
|
552
|
+
goto enomem;
|
553
|
+
|
554
|
+
/*
|
555
|
+
* Needed for C++ global declarations using "new",
|
556
|
+
* which happens before our constructor
|
557
|
+
*/
|
558
|
+
#ifndef __FreeBSD__
|
559
|
+
if (!real_malloc) {
|
560
|
+
if (resolving_malloc) goto enomem;
|
561
|
+
resolving_malloc = 1;
|
562
|
+
real_malloc = dlsym(RTLD_NEXT, "malloc");
|
563
|
+
}
|
564
|
+
#endif
|
565
|
+
l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
|
566
|
+
p = h = real_malloc(asize);
|
567
|
+
if (h) {
|
568
|
+
alloc_insert_rcu(l, h, size, h);
|
569
|
+
p = hdr2ptr(h);
|
570
|
+
}
|
571
|
+
update_stats_rcu_unlock(l);
|
572
|
+
if (caa_unlikely(!p)) errno = ENOMEM;
|
573
|
+
return p;
|
574
|
+
enomem:
|
575
|
+
errno = ENOMEM;
|
576
|
+
return 0;
|
273
577
|
}
|
274
578
|
|
275
579
|
void *calloc(size_t nmemb, size_t size)
|
276
580
|
{
|
581
|
+
void *p;
|
582
|
+
struct src_loc *l;
|
583
|
+
struct alloc_hdr *h;
|
584
|
+
size_t asize;
|
585
|
+
|
586
|
+
if (__builtin_mul_overflow(size, nmemb, &size)) {
|
587
|
+
errno = ENOMEM;
|
588
|
+
return 0;
|
589
|
+
}
|
590
|
+
if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
|
591
|
+
errno = ENOMEM;
|
592
|
+
return 0;
|
593
|
+
}
|
277
594
|
RETURN_IF_NOT_READY();
|
278
|
-
|
279
|
-
|
280
|
-
|
595
|
+
l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
|
596
|
+
p = h = real_malloc(asize);
|
597
|
+
if (p) {
|
598
|
+
alloc_insert_rcu(l, h, size, h);
|
599
|
+
p = hdr2ptr(h);
|
600
|
+
memset(p, 0, size);
|
601
|
+
}
|
602
|
+
update_stats_rcu_unlock(l);
|
603
|
+
if (caa_unlikely(!p)) errno = ENOMEM;
|
604
|
+
return p;
|
281
605
|
}
|
282
606
|
|
283
607
|
void *realloc(void *ptr, size_t size)
|
284
608
|
{
|
609
|
+
void *p;
|
610
|
+
struct src_loc *l;
|
611
|
+
struct alloc_hdr *h;
|
612
|
+
size_t asize;
|
613
|
+
|
614
|
+
if (!size) {
|
615
|
+
free(ptr);
|
616
|
+
return 0;
|
617
|
+
}
|
618
|
+
if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
|
619
|
+
errno = ENOMEM;
|
620
|
+
return 0;
|
621
|
+
}
|
285
622
|
RETURN_IF_NOT_READY();
|
286
|
-
|
287
|
-
|
623
|
+
|
624
|
+
l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
|
625
|
+
p = h = real_malloc(asize);
|
626
|
+
if (p) {
|
627
|
+
alloc_insert_rcu(l, h, size, h);
|
628
|
+
p = hdr2ptr(h);
|
629
|
+
}
|
630
|
+
update_stats_rcu_unlock(l);
|
631
|
+
|
632
|
+
if (ptr && p) {
|
633
|
+
struct alloc_hdr *old = ptr2hdr(ptr);
|
634
|
+
memcpy(p, ptr, old->size < size ? old->size : size);
|
635
|
+
free(ptr);
|
636
|
+
}
|
637
|
+
if (caa_unlikely(!p)) errno = ENOMEM;
|
638
|
+
return p;
|
288
639
|
}
|
289
640
|
|
290
641
|
struct dump_arg {
|
@@ -314,7 +665,7 @@ static void *dump_to_file(void *x)
|
|
314
665
|
p = s[0];
|
315
666
|
}
|
316
667
|
fprintf(a->fp, "%16zu %12zu %s\n",
|
317
|
-
l->total, l->
|
668
|
+
l->total, l->allocations, (const char *)p);
|
318
669
|
if (s) free(s);
|
319
670
|
}
|
320
671
|
out_unlock:
|
@@ -349,6 +700,7 @@ static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
|
|
349
700
|
io = *((VALUE *)dlsym(RTLD_DEFAULT, "rb_stderr"));
|
350
701
|
|
351
702
|
a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
|
703
|
+
io = rb_io_get_io(io);
|
352
704
|
io = rb_io_get_write_io(io);
|
353
705
|
GetOpenFile(io, fptr);
|
354
706
|
a.fp = rb_io_stdio_file(fptr);
|
@@ -358,49 +710,6 @@ static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
|
|
358
710
|
return Qnil;
|
359
711
|
}
|
360
712
|
|
361
|
-
static void
|
362
|
-
free_src_loc(struct rcu_head *head)
|
363
|
-
{
|
364
|
-
struct src_loc *l = caa_container_of(head, struct src_loc, rcu_head);
|
365
|
-
free(l);
|
366
|
-
}
|
367
|
-
|
368
|
-
static void *totals_clear(void *ign)
|
369
|
-
{
|
370
|
-
struct cds_lfht *new, *old;
|
371
|
-
struct cds_lfht_iter iter;
|
372
|
-
struct src_loc *l;
|
373
|
-
|
374
|
-
new = lfht_new();
|
375
|
-
rcu_read_lock();
|
376
|
-
old = rcu_dereference(totals);
|
377
|
-
rcu_assign_pointer(totals, new);
|
378
|
-
cds_lfht_for_each_entry(old, &iter, l, hnode) {
|
379
|
-
cds_lfht_del(old, &l->hnode);
|
380
|
-
call_rcu(&l->rcu_head, free_src_loc);
|
381
|
-
}
|
382
|
-
rcu_read_unlock();
|
383
|
-
|
384
|
-
synchronize_rcu(); /* ensure totals points to new */
|
385
|
-
cds_lfht_destroy(old, NULL);
|
386
|
-
return 0;
|
387
|
-
}
|
388
|
-
|
389
|
-
/*
|
390
|
-
* call-seq:
|
391
|
-
*
|
392
|
-
* Mwrap.clear -> nil
|
393
|
-
*
|
394
|
-
* Atomically replaces the totals table and destroys the old one.
|
395
|
-
* This resets all statistics. It is more expensive than `Mwrap.reset'
|
396
|
-
* as new allocations will need to be made to repopulate the new table.
|
397
|
-
*/
|
398
|
-
static VALUE mwrap_clear(VALUE mod)
|
399
|
-
{
|
400
|
-
rb_thread_call_without_gvl(totals_clear, 0, 0, 0);
|
401
|
-
return Qnil;
|
402
|
-
}
|
403
|
-
|
404
713
|
static void *totals_reset(void *ign)
|
405
714
|
{
|
406
715
|
struct cds_lfht *t;
|
@@ -411,7 +720,10 @@ static void *totals_reset(void *ign)
|
|
411
720
|
t = rcu_dereference(totals);
|
412
721
|
cds_lfht_for_each_entry(t, &iter, l, hnode) {
|
413
722
|
uatomic_set(&l->total, 0);
|
414
|
-
uatomic_set(&l->
|
723
|
+
uatomic_set(&l->allocations, 0);
|
724
|
+
uatomic_set(&l->frees, 0);
|
725
|
+
uatomic_set(&l->age_total, 0);
|
726
|
+
uatomic_set(&l->max_lifespan, 0);
|
415
727
|
}
|
416
728
|
rcu_read_unlock();
|
417
729
|
return 0;
|
@@ -423,8 +735,8 @@ static void *totals_reset(void *ign)
|
|
423
735
|
* Mwrap.reset -> nil
|
424
736
|
*
|
425
737
|
* Resets the the total tables by zero-ing all counters.
|
426
|
-
* This resets all statistics
|
427
|
-
*
|
738
|
+
* This resets all statistics. This is not an atomic operation
|
739
|
+
* as other threads (outside of GVL) may increment counters.
|
428
740
|
*/
|
429
741
|
static VALUE mwrap_reset(VALUE mod)
|
430
742
|
{
|
@@ -432,13 +744,40 @@ static VALUE mwrap_reset(VALUE mod)
|
|
432
744
|
return Qnil;
|
433
745
|
}
|
434
746
|
|
435
|
-
|
747
|
+
/* :nodoc: */
|
748
|
+
static VALUE mwrap_clear(VALUE mod)
|
749
|
+
{
|
750
|
+
return mwrap_reset(mod);
|
751
|
+
}
|
752
|
+
|
753
|
+
static VALUE rcu_unlock_ensure(VALUE ignored)
|
436
754
|
{
|
437
755
|
rcu_read_unlock();
|
438
756
|
--locating;
|
439
757
|
return Qfalse;
|
440
758
|
}
|
441
759
|
|
760
|
+
static VALUE location_string(struct src_loc *l)
|
761
|
+
{
|
762
|
+
VALUE ret, tmp;
|
763
|
+
|
764
|
+
if (loc_is_addr(l)) {
|
765
|
+
char **s = backtrace_symbols((void *)l->k, 1);
|
766
|
+
tmp = rb_str_new_cstr(s[0]);
|
767
|
+
free(s);
|
768
|
+
}
|
769
|
+
else {
|
770
|
+
tmp = rb_str_new(l->k, l->capa - 1);
|
771
|
+
}
|
772
|
+
|
773
|
+
/* deduplicate and try to free up some memory */
|
774
|
+
ret = rb_funcall(tmp, id_uminus, 0);
|
775
|
+
if (!OBJ_FROZEN_RAW(tmp))
|
776
|
+
rb_str_resize(tmp, 0);
|
777
|
+
|
778
|
+
return ret;
|
779
|
+
}
|
780
|
+
|
442
781
|
static VALUE dump_each_rcu(VALUE x)
|
443
782
|
{
|
444
783
|
struct dump_arg *a = (struct dump_arg *)x;
|
@@ -448,27 +787,17 @@ static VALUE dump_each_rcu(VALUE x)
|
|
448
787
|
|
449
788
|
t = rcu_dereference(totals);
|
450
789
|
cds_lfht_for_each_entry(t, &iter, l, hnode) {
|
451
|
-
VALUE v[
|
790
|
+
VALUE v[6];
|
452
791
|
if (l->total <= a->min) continue;
|
453
792
|
|
454
|
-
|
455
|
-
char **s = backtrace_symbols((void *)l->k, 1);
|
456
|
-
v[1] = rb_str_new_cstr(s[0]);
|
457
|
-
free(s);
|
458
|
-
}
|
459
|
-
else {
|
460
|
-
v[1] = rb_str_new(l->k, l->capa - 1);
|
461
|
-
}
|
462
|
-
|
463
|
-
/* deduplicate and try to free up some memory */
|
464
|
-
v[0] = rb_funcall(v[1], id_uminus, 0);
|
465
|
-
if (!OBJ_FROZEN_RAW(v[1]))
|
466
|
-
rb_str_resize(v[1], 0);
|
467
|
-
|
793
|
+
v[0] = location_string(l);
|
468
794
|
v[1] = SIZET2NUM(l->total);
|
469
|
-
v[2] = SIZET2NUM(l->
|
795
|
+
v[2] = SIZET2NUM(l->allocations);
|
796
|
+
v[3] = SIZET2NUM(l->frees);
|
797
|
+
v[4] = SIZET2NUM(l->age_total);
|
798
|
+
v[5] = SIZET2NUM(l->max_lifespan);
|
470
799
|
|
471
|
-
rb_yield_values2(
|
800
|
+
rb_yield_values2(6, v);
|
472
801
|
assert(rcu_read_ongoing());
|
473
802
|
}
|
474
803
|
return Qnil;
|
@@ -477,10 +806,12 @@ static VALUE dump_each_rcu(VALUE x)
|
|
477
806
|
/*
|
478
807
|
* call-seq:
|
479
808
|
*
|
480
|
-
*
|
809
|
+
* Mwrap.each([min]) do |location,total,allocations,frees,age_total,max_lifespan|
|
810
|
+
* ...
|
811
|
+
* end
|
481
812
|
*
|
482
813
|
* Yields each entry of the of the table to a caller-supplied block.
|
483
|
-
* +min+ may be specified to filter out lines with +
|
814
|
+
* +min+ may be specified to filter out lines with +total+ bytes
|
484
815
|
* equal-to-or-smaller-than the supplied minimum.
|
485
816
|
*/
|
486
817
|
static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
|
@@ -494,7 +825,212 @@ static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
|
|
494
825
|
++locating;
|
495
826
|
rcu_read_lock();
|
496
827
|
|
497
|
-
return rb_ensure(dump_each_rcu, (VALUE)&a,
|
828
|
+
return rb_ensure(dump_each_rcu, (VALUE)&a, rcu_unlock_ensure, 0);
|
829
|
+
}
|
830
|
+
|
831
|
+
static size_t
|
832
|
+
src_loc_memsize(const void *p)
|
833
|
+
{
|
834
|
+
return sizeof(struct src_loc);
|
835
|
+
}
|
836
|
+
|
837
|
+
static const rb_data_type_t src_loc_type = {
|
838
|
+
"source_location",
|
839
|
+
/* no marking, no freeing */
|
840
|
+
{ 0, 0, src_loc_memsize, /* reserved */ },
|
841
|
+
/* parent, data, [ flags ] */
|
842
|
+
};
|
843
|
+
|
844
|
+
static VALUE cSrcLoc;
|
845
|
+
|
846
|
+
static int
|
847
|
+
extract_addr(const char *str, size_t len, void **p)
|
848
|
+
{
|
849
|
+
const char *c;
|
850
|
+
#if defined(__GLIBC__)
|
851
|
+
return ((c = memrchr(str, '[', len)) && sscanf(c, "[%p]", p));
|
852
|
+
#else /* tested FreeBSD */
|
853
|
+
return ((c = strstr(str, "0x")) && sscanf(c, "%p", p));
|
854
|
+
#endif
|
855
|
+
}
|
856
|
+
|
857
|
+
/*
|
858
|
+
* call-seq:
|
859
|
+
* Mwrap[location] -> Mwrap::SourceLocation
|
860
|
+
*
|
861
|
+
* Returns the associated Mwrap::SourceLocation given the +location+
|
862
|
+
* String. +location+ is either a Ruby source location path:line
|
863
|
+
* (e.g. "/path/to/foo.rb:5") or a hexadecimal memory address with
|
864
|
+
* square-braces part yielded by Mwrap.dump (e.g. "[0xdeadbeef]")
|
865
|
+
*/
|
866
|
+
static VALUE mwrap_aref(VALUE mod, VALUE loc)
|
867
|
+
{
|
868
|
+
const char *str = StringValueCStr(loc);
|
869
|
+
int len = RSTRING_LENINT(loc);
|
870
|
+
struct src_loc *k = 0;
|
871
|
+
uintptr_t p;
|
872
|
+
struct cds_lfht_iter iter;
|
873
|
+
struct cds_lfht_node *cur;
|
874
|
+
struct cds_lfht *t;
|
875
|
+
struct src_loc *l;
|
876
|
+
VALUE val = Qnil;
|
877
|
+
|
878
|
+
if (extract_addr(str, len, (void **)&p)) {
|
879
|
+
k = (void *)kbuf;
|
880
|
+
memcpy(k->k, &p, sizeof(p));
|
881
|
+
k->capa = 0;
|
882
|
+
k->hval = jhash(k->k, sizeof(p), 0xdeadbeef);
|
883
|
+
} else {
|
884
|
+
k = (void *)kbuf;
|
885
|
+
memcpy(k->k, str, len + 1);
|
886
|
+
k->capa = len + 1;
|
887
|
+
k->hval = jhash(k->k, k->capa, 0xdeadbeef);
|
888
|
+
}
|
889
|
+
|
890
|
+
if (!k) return val;
|
891
|
+
|
892
|
+
rcu_read_lock();
|
893
|
+
t = rcu_dereference(totals);
|
894
|
+
if (!t) goto out_unlock;
|
895
|
+
|
896
|
+
cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
|
897
|
+
cur = cds_lfht_iter_get_node(&iter);
|
898
|
+
if (cur) {
|
899
|
+
l = caa_container_of(cur, struct src_loc, hnode);
|
900
|
+
val = TypedData_Wrap_Struct(cSrcLoc, &src_loc_type, l);
|
901
|
+
}
|
902
|
+
out_unlock:
|
903
|
+
rcu_read_unlock();
|
904
|
+
return val;
|
905
|
+
}
|
906
|
+
|
907
|
+
static VALUE src_loc_each_i(VALUE p)
|
908
|
+
{
|
909
|
+
struct alloc_hdr *h;
|
910
|
+
struct src_loc *l = (struct src_loc *)p;
|
911
|
+
|
912
|
+
cds_list_for_each_entry_rcu(h, &l->allocs, anode) {
|
913
|
+
size_t gen = uatomic_read(&h->as.live.gen);
|
914
|
+
size_t size = uatomic_read(&h->size);
|
915
|
+
|
916
|
+
if (size) {
|
917
|
+
VALUE v[2];
|
918
|
+
v[0] = SIZET2NUM(size);
|
919
|
+
v[1] = SIZET2NUM(gen);
|
920
|
+
|
921
|
+
rb_yield_values2(2, v);
|
922
|
+
}
|
923
|
+
}
|
924
|
+
|
925
|
+
return Qfalse;
|
926
|
+
}
|
927
|
+
|
928
|
+
static struct src_loc *src_loc_get(VALUE self)
|
929
|
+
{
|
930
|
+
struct src_loc *l;
|
931
|
+
TypedData_Get_Struct(self, struct src_loc, &src_loc_type, l);
|
932
|
+
assert(l);
|
933
|
+
return l;
|
934
|
+
}
|
935
|
+
|
936
|
+
/*
|
937
|
+
* call-seq:
|
938
|
+
* loc = Mwrap[location]
|
939
|
+
* loc.each { |size,generation| ... }
|
940
|
+
*
|
941
|
+
* Iterates through live allocations for a given Mwrap::SourceLocation,
|
942
|
+
* yielding the +size+ (in bytes) and +generation+ of each allocation.
|
943
|
+
* The +generation+ is the value of the GC.count method at the time
|
944
|
+
* the allocation was made.
|
945
|
+
*
|
946
|
+
* This functionality is only available in mwrap 2.0.0+
|
947
|
+
*/
|
948
|
+
static VALUE src_loc_each(VALUE self)
|
949
|
+
{
|
950
|
+
struct src_loc *l = src_loc_get(self);
|
951
|
+
|
952
|
+
assert(locating == 0 && "forgot to clear locating");
|
953
|
+
++locating;
|
954
|
+
rcu_read_lock();
|
955
|
+
rb_ensure(src_loc_each_i, (VALUE)l, rcu_unlock_ensure, 0);
|
956
|
+
return self;
|
957
|
+
}
|
958
|
+
|
959
|
+
/*
|
960
|
+
* The the mean lifespan (in GC generations) of allocations made from this
|
961
|
+
* location. This does not account for live allocations.
|
962
|
+
*/
|
963
|
+
static VALUE src_loc_mean_lifespan(VALUE self)
|
964
|
+
{
|
965
|
+
struct src_loc *l = src_loc_get(self);
|
966
|
+
size_t tot, frees;
|
967
|
+
|
968
|
+
frees = uatomic_read(&l->frees);
|
969
|
+
tot = uatomic_read(&l->age_total);
|
970
|
+
return DBL2NUM(frees ? ((double)tot/(double)frees) : HUGE_VAL);
|
971
|
+
}
|
972
|
+
|
973
|
+
/* The number of frees made from this location */
|
974
|
+
static VALUE src_loc_frees(VALUE self)
|
975
|
+
{
|
976
|
+
return SIZET2NUM(uatomic_read(&src_loc_get(self)->frees));
|
977
|
+
}
|
978
|
+
|
979
|
+
/* The number of allocations made from this location */
|
980
|
+
static VALUE src_loc_allocations(VALUE self)
|
981
|
+
{
|
982
|
+
return SIZET2NUM(uatomic_read(&src_loc_get(self)->allocations));
|
983
|
+
}
|
984
|
+
|
985
|
+
/* The total number of bytes allocated from this location */
|
986
|
+
static VALUE src_loc_total(VALUE self)
|
987
|
+
{
|
988
|
+
return SIZET2NUM(uatomic_read(&src_loc_get(self)->total));
|
989
|
+
}
|
990
|
+
|
991
|
+
/*
|
992
|
+
* The maximum age (in GC generations) of an allocation before it was freed.
|
993
|
+
* This does not account for live allocations.
|
994
|
+
*/
|
995
|
+
static VALUE src_loc_max_lifespan(VALUE self)
|
996
|
+
{
|
997
|
+
return SIZET2NUM(uatomic_read(&src_loc_get(self)->max_lifespan));
|
998
|
+
}
|
999
|
+
|
1000
|
+
/*
|
1001
|
+
* Returns a frozen String location of the given SourceLocation object.
|
1002
|
+
*/
|
1003
|
+
static VALUE src_loc_name(VALUE self)
|
1004
|
+
{
|
1005
|
+
struct src_loc *l = src_loc_get(self);
|
1006
|
+
VALUE ret;
|
1007
|
+
|
1008
|
+
++locating;
|
1009
|
+
ret = location_string(l);
|
1010
|
+
--locating;
|
1011
|
+
return ret;
|
1012
|
+
}
|
1013
|
+
|
1014
|
+
static VALUE reset_locating(VALUE ign) { --locating; return Qfalse; }
|
1015
|
+
|
1016
|
+
/*
|
1017
|
+
* call-seq:
|
1018
|
+
*
|
1019
|
+
* Mwrap.quiet do |depth|
|
1020
|
+
* # expensive sort/calculate/emitting results of Mwrap.each
|
1021
|
+
* # affecting statistics of the rest of the app
|
1022
|
+
* end
|
1023
|
+
*
|
1024
|
+
* Stops allocation tracking inside the block. This is useful for
|
1025
|
+
* monitoring code which calls other Mwrap (or ObjectSpace/GC)
|
1026
|
+
* functions which unavoidably allocate memory.
|
1027
|
+
*
|
1028
|
+
* This feature was added in mwrap 2.0.0+
|
1029
|
+
*/
|
1030
|
+
static VALUE mwrap_quiet(VALUE mod)
|
1031
|
+
{
|
1032
|
+
size_t cur = ++locating;
|
1033
|
+
return rb_ensure(rb_yield, SIZET2NUM(cur), reset_locating, 0);
|
498
1034
|
}
|
499
1035
|
|
500
1036
|
/*
|
@@ -515,19 +1051,47 @@ static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
|
|
515
1051
|
* * dump_fd: a writable FD to dump to
|
516
1052
|
* * dump_path: a path to dump to, the file is opened in O_APPEND mode
|
517
1053
|
* * dump_min: the minimum allocation size (total) to dump
|
1054
|
+
* * memalign: use `1' to enable tracking the memalign family
|
518
1055
|
*
|
519
1056
|
* If both `dump_fd' and `dump_path' are specified, dump_path takes
|
520
1057
|
* precedence.
|
1058
|
+
*
|
1059
|
+
* Tracking the memalign family of functions is misleading for Ruby
|
1060
|
+
* applications, as heap page allocations can happen anywhere a
|
1061
|
+
* Ruby object is allocated, even in the coldest code paths.
|
1062
|
+
* Furthermore, it is rarely-used outside of the Ruby object allocator.
|
1063
|
+
* Thus tracking memalign functions is disabled by default.
|
521
1064
|
*/
|
522
1065
|
void Init_mwrap(void)
|
523
1066
|
{
|
524
|
-
VALUE mod
|
1067
|
+
VALUE mod;
|
1068
|
+
|
1069
|
+
++locating;
|
1070
|
+
mod = rb_define_module("Mwrap");
|
525
1071
|
id_uminus = rb_intern("-@");
|
526
1072
|
|
1073
|
+
/*
|
1074
|
+
* Represents a location in source code or library
|
1075
|
+
* address which calls a memory allocation. It is
|
1076
|
+
* updated automatically as allocations are made, so
|
1077
|
+
* there is no need to reload or reread it from Mwrap#[].
|
1078
|
+
* This class is only available since mwrap 2.0.0+.
|
1079
|
+
*/
|
1080
|
+
cSrcLoc = rb_define_class_under(mod, "SourceLocation", rb_cObject);
|
527
1081
|
rb_define_singleton_method(mod, "dump", mwrap_dump, -1);
|
528
|
-
rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
|
529
1082
|
rb_define_singleton_method(mod, "reset", mwrap_reset, 0);
|
1083
|
+
rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
|
530
1084
|
rb_define_singleton_method(mod, "each", mwrap_each, -1);
|
1085
|
+
rb_define_singleton_method(mod, "[]", mwrap_aref, 1);
|
1086
|
+
rb_define_singleton_method(mod, "quiet", mwrap_quiet, 0);
|
1087
|
+
rb_define_method(cSrcLoc, "each", src_loc_each, 0);
|
1088
|
+
rb_define_method(cSrcLoc, "frees", src_loc_frees, 0);
|
1089
|
+
rb_define_method(cSrcLoc, "allocations", src_loc_allocations, 0);
|
1090
|
+
rb_define_method(cSrcLoc, "total", src_loc_total, 0);
|
1091
|
+
rb_define_method(cSrcLoc, "mean_lifespan", src_loc_mean_lifespan, 0);
|
1092
|
+
rb_define_method(cSrcLoc, "max_lifespan", src_loc_max_lifespan, 0);
|
1093
|
+
rb_define_method(cSrcLoc, "name", src_loc_name, 0);
|
1094
|
+
--locating;
|
531
1095
|
}
|
532
1096
|
|
533
1097
|
/* rb_cloexec_open isn't usable by non-Ruby processes */
|
@@ -538,18 +1102,18 @@ void Init_mwrap(void)
|
|
538
1102
|
__attribute__ ((destructor))
|
539
1103
|
static void mwrap_dump_destructor(void)
|
540
1104
|
{
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
1105
|
+
const char *opt = getenv("MWRAP");
|
1106
|
+
const char *modes[] = { "a", "a+", "w", "w+", "r+" };
|
1107
|
+
struct dump_arg a;
|
1108
|
+
size_t i;
|
1109
|
+
int dump_fd;
|
546
1110
|
char *dump_path;
|
547
1111
|
|
548
1112
|
if (!opt)
|
549
1113
|
return;
|
550
1114
|
|
551
|
-
|
552
|
-
|
1115
|
+
++locating;
|
1116
|
+
if ((dump_path = strstr(opt, "dump_path:")) &&
|
553
1117
|
(dump_path += sizeof("dump_path")) &&
|
554
1118
|
*dump_path) {
|
555
1119
|
char *end = strchr(dump_path, ',');
|
@@ -594,5 +1158,5 @@ static void mwrap_dump_destructor(void)
|
|
594
1158
|
}
|
595
1159
|
dump_to_file(&a);
|
596
1160
|
out:
|
597
|
-
|
1161
|
+
--locating;
|
598
1162
|
}
|