mwrap 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.document +2 -0
- data/MANIFEST +1 -0
- data/README +31 -21
- data/bin/mwrap +7 -0
- data/ext/mwrap/extconf.rb +15 -0
- data/ext/mwrap/mwrap.c +710 -146
- data/mwrap.gemspec +1 -1
- data/test/test_mwrap.rb +126 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8559afec1946b7f545c944085aac5e205601deb82fa9f1529785dd3ef7526e5a
|
4
|
+
data.tar.gz: 34ea90410103ec59f367baa00e1e4df6424fe67af8608b5c7a3a0cb66fec2440
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff91f6f250e7a18cb465a6dc04ddee374411a10a92cd1b3227e09a4ccd0be1b8597f475f71d5061bd5bc36e8c0418ce3bf6faa3a78b37aec5d21b0340f736031
|
7
|
+
data.tar.gz: 06311f2949ae4dae6bd2d0e95e6b0d147b44b6bae6cd4464e4c1d160fc1f5f3f99b2a6a3a3e711a10b8894da3bdb6220ccfeb72dfb18545170edbbc65dbad225
|
data/.document
ADDED
data/MANIFEST
CHANGED
data/README
CHANGED
@@ -4,20 +4,23 @@ mwrap is designed to answer the question:
|
|
4
4
|
|
5
5
|
Which lines of Ruby are hitting malloc the most?
|
6
6
|
|
7
|
-
mwrap wraps all malloc
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
7
|
+
mwrap wraps all malloc-family calls to trace the Ruby source
|
8
|
+
location of such calls and bytes allocated at each callsite.
|
9
|
+
As of mwrap 2.0.0, it can also function as a leak detector
|
10
|
+
and show live allocations at every call site. Depending on
|
11
|
+
your application and workload, the overhead is roughly a 50%
|
12
|
+
increase memory and runtime.
|
13
|
+
|
14
|
+
It works best for allocations under GVL, but tries to track
|
15
|
+
numeric caller addresses for allocations made without GVL so you
|
16
|
+
can get an idea of how much memory usage certain extensions and
|
17
|
+
native libraries use.
|
15
18
|
|
16
19
|
It requires the concurrent lock-free hash table from the
|
17
20
|
Userspace RCU project: https://liburcu.org/
|
18
21
|
|
19
|
-
It does not require recompiling or rebuilding Ruby, but only
|
20
|
-
Ruby trunk (2.6.0dev+) on a few platforms:
|
22
|
+
It does not require recompiling or rebuilding Ruby, but only
|
23
|
+
supports Ruby trunk (2.6.0dev+) on a few platforms:
|
21
24
|
|
22
25
|
* GNU/Linux
|
23
26
|
* FreeBSD (tested 11.1)
|
@@ -36,28 +39,35 @@ It may work on NetBSD, OpenBSD and DragonFly BSD.
|
|
36
39
|
== Usage
|
37
40
|
|
38
41
|
mwrap works as an LD_PRELOAD and supplies a mwrap RubyGem executable to
|
39
|
-
improve ease-of-use. You can set
|
40
|
-
variable to
|
41
|
-
|
42
|
-
# Dump results to stderr at exit:
|
43
|
-
MWRAP=dump_fd:2 mwrap RUBY_COMMAND
|
44
|
-
|
45
|
-
You may also set dump_path to append to a log file:
|
42
|
+
improve ease-of-use. You can set dump_path: in the MWRAP environment
|
43
|
+
variable to append the results to a log file:
|
46
44
|
|
47
45
|
MWRAP=dump_path:/path/to/log mwrap RUBY_COMMAND
|
48
46
|
|
49
|
-
|
50
|
-
|
47
|
+
# And to display the locations with the most allocations:
|
48
|
+
sort -k1,1rn </path/to/log | $PAGER
|
49
|
+
|
50
|
+
You may also `require "mwrap"' in your Ruby code and use
|
51
|
+
Mwrap.dump, Mwrap.reset, Mwrap.each, etc.
|
51
52
|
|
52
53
|
However, mwrap MUST be loaded via LD_PRELOAD to have any
|
53
|
-
effect in tracking malloc use.
|
54
|
+
effect in tracking malloc use. However, it is safe to keep
|
55
|
+
"require 'mwrap'" in performance-critical deployments,
|
56
|
+
as overhead is only incurred when used as an LD_PRELOAD.
|
54
57
|
|
55
58
|
The output of the mwrap dump is a text file with 3 columns:
|
56
59
|
|
57
60
|
total_bytes call_count location
|
58
61
|
|
59
62
|
Where location is a Ruby source location (if made under GVL)
|
60
|
-
or an address retrieved by backtrace_symbols(3)
|
63
|
+
or an address retrieved by backtrace_symbols(3). It is
|
64
|
+
recommended to use the sort(1) command on either of the
|
65
|
+
first two columns to find the hottest malloc locations.
|
66
|
+
|
67
|
+
mwrap 2.0.0+ also supports a Rack application endpoint,
|
68
|
+
it is documented at:
|
69
|
+
|
70
|
+
https://80x24.org/mwrap/MwrapRack.html
|
61
71
|
|
62
72
|
== Known problems
|
63
73
|
|
data/bin/mwrap
CHANGED
@@ -26,4 +26,11 @@ if ENV['MWRAP'] =~ /dump_fd:(\d+)/
|
|
26
26
|
opts[dump_fd] = dump_io
|
27
27
|
end
|
28
28
|
end
|
29
|
+
|
30
|
+
# allow inheriting FDs from systemd
|
31
|
+
n = ENV['LISTEN_FDS']
|
32
|
+
if n && ENV['LISTEN_PID'].to_i == $$
|
33
|
+
n = 3 + n.to_i
|
34
|
+
(3...n).each { |fd| opts[fd] = IO.new(fd) }
|
35
|
+
end
|
29
36
|
exec *ARGV, opts
|
data/ext/mwrap/extconf.rb
CHANGED
@@ -10,4 +10,19 @@ have_library 'urcu-bp' or abort 'liburcu-bp not found'
|
|
10
10
|
have_library 'dl'
|
11
11
|
have_library 'c'
|
12
12
|
have_library 'execinfo' # FreeBSD
|
13
|
+
|
14
|
+
if try_link(<<'')
|
15
|
+
int main(void) { return __builtin_add_overflow_p(0,0,(int)1); }
|
16
|
+
|
17
|
+
$defs << '-DHAVE_BUILTIN_ADD_OVERFLOW_P'
|
18
|
+
end
|
19
|
+
|
20
|
+
if try_link(<<'')
|
21
|
+
int main(int a) { return __builtin_add_overflow(0,0,&a); }
|
22
|
+
|
23
|
+
$defs << '-DHAVE_BUILTIN_ADD_OVERFLOW_P'
|
24
|
+
else
|
25
|
+
abort 'missing __builtin_add_overflow'
|
26
|
+
end
|
27
|
+
|
13
28
|
create_makefile 'mwrap'
|
data/ext/mwrap/mwrap.c
CHANGED
@@ -16,26 +16,40 @@
|
|
16
16
|
#include <sys/types.h>
|
17
17
|
#include <sys/stat.h>
|
18
18
|
#include <fcntl.h>
|
19
|
+
#include <pthread.h>
|
19
20
|
#include <urcu-bp.h>
|
20
21
|
#include <urcu/rculfhash.h>
|
22
|
+
#include <urcu/rculist.h>
|
21
23
|
#include "jhash.h"
|
22
24
|
|
23
25
|
static ID id_uminus;
|
26
|
+
static unsigned int track_memalign;
|
24
27
|
const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev */
|
25
|
-
|
28
|
+
extern int __attribute__((weak)) ruby_thread_has_gvl_p(void);
|
29
|
+
extern void * __attribute__((weak)) ruby_current_execution_context_ptr;
|
30
|
+
extern void * __attribute__((weak)) ruby_current_vm_ptr; /* for rb_gc_count */
|
31
|
+
extern size_t __attribute__((weak)) rb_gc_count(void);
|
32
|
+
extern VALUE __attribute__((weak)) rb_cObject;
|
33
|
+
extern VALUE __attribute__((weak)) rb_yield(VALUE);
|
34
|
+
|
35
|
+
/* true for glibc/dlmalloc/ptmalloc, not sure about jemalloc */
|
36
|
+
#define ASSUMED_MALLOC_ALIGNMENT (sizeof(void *) * 2)
|
37
|
+
|
38
|
+
int __attribute__((weak)) ruby_thread_has_gvl_p(void)
|
39
|
+
{
|
40
|
+
return 0;
|
41
|
+
}
|
42
|
+
|
26
43
|
#ifdef __FreeBSD__
|
27
44
|
void *__malloc(size_t);
|
28
|
-
void
|
29
|
-
|
30
|
-
|
31
|
-
static void *(*real_calloc)(size_t, size_t) = __calloc;
|
32
|
-
static void *(*real_realloc)(void *, size_t) = __realloc;
|
33
|
-
# define RETURN_IF_NOT_READY() do {} while (0) /* nothing */
|
45
|
+
void __free(void *);
|
46
|
+
# define real_malloc __malloc
|
47
|
+
# define real_free __free
|
34
48
|
#else
|
35
|
-
static int ready;
|
36
49
|
static void *(*real_malloc)(size_t);
|
37
|
-
static void
|
38
|
-
static
|
50
|
+
static void (*real_free)(void *);
|
51
|
+
static int resolving_malloc;
|
52
|
+
#endif /* !FreeBSD */
|
39
53
|
|
40
54
|
/*
|
41
55
|
* we need to fake an OOM condition while dlsym is running,
|
@@ -43,22 +57,33 @@ static void *(*real_realloc)(void *, size_t);
|
|
43
57
|
* symbol for the jemalloc calloc, yet
|
44
58
|
*/
|
45
59
|
# define RETURN_IF_NOT_READY() do { \
|
46
|
-
if (!
|
60
|
+
if (!real_malloc) { \
|
47
61
|
errno = ENOMEM; \
|
48
62
|
return NULL; \
|
49
63
|
} \
|
50
64
|
} while (0)
|
51
65
|
|
52
|
-
|
66
|
+
static __thread size_t locating;
|
67
|
+
static size_t generation;
|
68
|
+
static size_t page_size;
|
69
|
+
static struct cds_lfht *totals;
|
70
|
+
union padded_mutex {
|
71
|
+
pthread_mutex_t mtx;
|
72
|
+
char pad[64];
|
73
|
+
};
|
53
74
|
|
54
|
-
/*
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
75
|
+
/* a round-robin pool of mutexes */
|
76
|
+
#define MUTEX_NR (1 << 6)
|
77
|
+
#define MUTEX_MASK (MUTEX_NR - 1)
|
78
|
+
static size_t mutex_i;
|
79
|
+
static union padded_mutex mutexes[MUTEX_NR] = {
|
80
|
+
[0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
|
81
|
+
};
|
60
82
|
|
61
|
-
static
|
83
|
+
static pthread_mutex_t *mutex_assign(void)
|
84
|
+
{
|
85
|
+
return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx;
|
86
|
+
}
|
62
87
|
|
63
88
|
static struct cds_lfht *
|
64
89
|
lfht_new(void)
|
@@ -69,19 +94,40 @@ lfht_new(void)
|
|
69
94
|
__attribute__((constructor)) static void resolve_malloc(void)
|
70
95
|
{
|
71
96
|
int err;
|
97
|
+
const char *opt;
|
98
|
+
++locating;
|
72
99
|
|
73
|
-
#
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
100
|
+
#ifdef __FreeBSD__
|
101
|
+
/*
|
102
|
+
* PTHREAD_MUTEX_INITIALIZER on FreeBSD means lazy initialization,
|
103
|
+
* which happens at pthread_mutex_lock, and that calls calloc
|
104
|
+
*/
|
105
|
+
{
|
106
|
+
size_t i;
|
107
|
+
|
108
|
+
for (i = 0; i < MUTEX_NR; i++) {
|
109
|
+
err = pthread_mutex_init(&mutexes[i].mtx, 0);
|
110
|
+
if (err) {
|
111
|
+
fprintf(stderr, "error: %s\n", strerror(err));
|
112
|
+
_exit(1);
|
113
|
+
}
|
114
|
+
}
|
115
|
+
/* initialize mutexes used by urcu-bp */
|
116
|
+
rcu_read_lock();
|
117
|
+
rcu_read_unlock();
|
118
|
+
}
|
119
|
+
#else /* !FreeBSD (tested on GNU/Linux) */
|
120
|
+
if (!real_malloc) {
|
121
|
+
resolving_malloc = 1;
|
122
|
+
real_malloc = dlsym(RTLD_NEXT, "malloc");
|
123
|
+
}
|
124
|
+
real_free = dlsym(RTLD_NEXT, "free");
|
125
|
+
if (!real_malloc || !real_free) {
|
126
|
+
fprintf(stderr, "missing malloc/aligned_alloc/free\n"
|
127
|
+
"\t%p %p\n", real_malloc, real_free);
|
80
128
|
_exit(1);
|
81
129
|
}
|
82
|
-
|
83
|
-
#endif
|
84
|
-
|
130
|
+
#endif /* !FreeBSD */
|
85
131
|
totals = lfht_new();
|
86
132
|
if (!totals)
|
87
133
|
fprintf(stderr, "failed to allocate totals table\n");
|
@@ -91,14 +137,27 @@ __attribute__((constructor)) static void resolve_malloc(void)
|
|
91
137
|
call_rcu_after_fork_child);
|
92
138
|
if (err)
|
93
139
|
fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err));
|
140
|
+
page_size = sysconf(_SC_PAGESIZE);
|
141
|
+
opt = getenv("MWRAP");
|
142
|
+
if (opt && (opt = strstr(opt, "memalign:"))) {
|
143
|
+
if (!sscanf(opt, "memalign:%u", &track_memalign))
|
144
|
+
fprintf(stderr, "not an unsigned int: %s\n", opt);
|
145
|
+
}
|
146
|
+
--locating;
|
147
|
+
}
|
94
148
|
|
95
|
-
|
149
|
+
static void
|
150
|
+
mutex_lock(pthread_mutex_t *m)
|
151
|
+
{
|
152
|
+
int err = pthread_mutex_lock(m);
|
153
|
+
assert(err == 0);
|
154
|
+
}
|
96
155
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
156
|
+
static void
|
157
|
+
mutex_unlock(pthread_mutex_t *m)
|
158
|
+
{
|
159
|
+
int err = pthread_mutex_unlock(m);
|
160
|
+
assert(err == 0);
|
102
161
|
}
|
103
162
|
|
104
163
|
#ifndef HAVE_MEMPCPY
|
@@ -114,8 +173,6 @@ my_mempcpy(void *dest, const void *src, size_t n)
|
|
114
173
|
#define RETURN_ADDRESS(nr) \
|
115
174
|
(uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
|
116
175
|
|
117
|
-
static __thread size_t locating;
|
118
|
-
|
119
176
|
#define INT2STR_MAX (sizeof(int) == 4 ? 10 : 19)
|
120
177
|
static char *int2str(int num, char *dst, size_t * size)
|
121
178
|
{
|
@@ -143,21 +200,58 @@ static char *int2str(int num, char *dst, size_t * size)
|
|
143
200
|
return NULL;
|
144
201
|
}
|
145
202
|
|
203
|
+
/*
|
204
|
+
* rb_source_location_cstr relies on GET_EC(), and it's possible
|
205
|
+
* to have a native thread but no EC during the early and late
|
206
|
+
* (teardown) phases of the Ruby process
|
207
|
+
*/
|
146
208
|
static int has_ec_p(void)
|
147
209
|
{
|
148
|
-
return (
|
210
|
+
return (ruby_thread_has_gvl_p() && ruby_current_vm_ptr &&
|
211
|
+
ruby_current_execution_context_ptr);
|
149
212
|
}
|
150
213
|
|
214
|
+
/* allocated via real_malloc/real_free */
|
151
215
|
struct src_loc {
|
152
|
-
|
153
|
-
size_t calls;
|
216
|
+
pthread_mutex_t *mtx;
|
154
217
|
size_t total;
|
218
|
+
size_t allocations;
|
219
|
+
size_t frees;
|
220
|
+
size_t age_total; /* (age_total / frees) => mean age at free */
|
221
|
+
size_t max_lifespan;
|
155
222
|
struct cds_lfht_node hnode;
|
223
|
+
struct cds_list_head allocs; /* <=> alloc_hdr.node */
|
156
224
|
uint32_t hval;
|
157
225
|
uint32_t capa;
|
158
226
|
char k[];
|
159
227
|
};
|
160
228
|
|
229
|
+
/* every allocation has this in the header, maintain alignment with malloc */
|
230
|
+
struct alloc_hdr {
|
231
|
+
struct cds_list_head anode; /* <=> src_loc.allocs */
|
232
|
+
union {
|
233
|
+
struct {
|
234
|
+
size_t gen; /* rb_gc_count() */
|
235
|
+
struct src_loc *loc;
|
236
|
+
} live;
|
237
|
+
struct rcu_head dead;
|
238
|
+
} as;
|
239
|
+
void *real; /* what to call real_free on */
|
240
|
+
size_t size;
|
241
|
+
};
|
242
|
+
|
243
|
+
static char kbuf[PATH_MAX + INT2STR_MAX + sizeof(struct alloc_hdr) + 2];
|
244
|
+
|
245
|
+
static struct alloc_hdr *ptr2hdr(void *p)
|
246
|
+
{
|
247
|
+
return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr));
|
248
|
+
}
|
249
|
+
|
250
|
+
static void *hdr2ptr(struct alloc_hdr *h)
|
251
|
+
{
|
252
|
+
return (void *)((uintptr_t)h + sizeof(struct alloc_hdr));
|
253
|
+
}
|
254
|
+
|
161
255
|
static int loc_is_addr(const struct src_loc *l)
|
162
256
|
{
|
163
257
|
return l->capa == 0;
|
@@ -180,16 +274,14 @@ static int loc_eq(struct cds_lfht_node *node, const void *key)
|
|
180
274
|
memcmp(k->k, existing->k, loc_size(k)) == 0);
|
181
275
|
}
|
182
276
|
|
183
|
-
static
|
277
|
+
static struct src_loc *totals_add_rcu(struct src_loc *k)
|
184
278
|
{
|
185
279
|
struct cds_lfht_iter iter;
|
186
280
|
struct cds_lfht_node *cur;
|
187
|
-
struct src_loc *l;
|
281
|
+
struct src_loc *l = 0;
|
188
282
|
struct cds_lfht *t;
|
189
283
|
|
190
|
-
|
191
284
|
again:
|
192
|
-
rcu_read_lock();
|
193
285
|
t = rcu_dereference(totals);
|
194
286
|
if (!t) goto out_unlock;
|
195
287
|
cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
|
@@ -197,44 +289,58 @@ again:
|
|
197
289
|
if (cur) {
|
198
290
|
l = caa_container_of(cur, struct src_loc, hnode);
|
199
291
|
uatomic_add(&l->total, k->total);
|
200
|
-
uatomic_add(&l->
|
292
|
+
uatomic_add(&l->allocations, 1);
|
201
293
|
} else {
|
202
294
|
size_t n = loc_size(k);
|
203
|
-
l =
|
295
|
+
l = real_malloc(sizeof(*l) + n);
|
204
296
|
if (!l) goto out_unlock;
|
205
|
-
|
206
297
|
memcpy(l, k, sizeof(*l) + n);
|
207
|
-
l->
|
298
|
+
l->mtx = mutex_assign();
|
299
|
+
l->age_total = 0;
|
300
|
+
l->max_lifespan = 0;
|
301
|
+
l->frees = 0;
|
302
|
+
l->allocations = 1;
|
303
|
+
CDS_INIT_LIST_HEAD(&l->allocs);
|
208
304
|
cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
|
209
305
|
if (cur != &l->hnode) { /* lost race */
|
210
306
|
rcu_read_unlock();
|
211
|
-
|
307
|
+
real_free(l);
|
308
|
+
rcu_read_lock();
|
212
309
|
goto again;
|
213
310
|
}
|
214
311
|
}
|
215
312
|
out_unlock:
|
216
|
-
|
313
|
+
return l;
|
314
|
+
}
|
315
|
+
|
316
|
+
static void update_stats_rcu_unlock(const struct src_loc *l)
|
317
|
+
{
|
318
|
+
if (caa_likely(l)) rcu_read_unlock();
|
217
319
|
}
|
218
320
|
|
219
|
-
static
|
321
|
+
static struct src_loc *update_stats_rcu_lock(size_t size, uintptr_t caller)
|
220
322
|
{
|
221
|
-
struct src_loc *k;
|
323
|
+
struct src_loc *k, *ret = 0;
|
222
324
|
static const size_t xlen = sizeof(caller);
|
223
325
|
char *dst;
|
224
326
|
|
327
|
+
if (caa_unlikely(!totals)) return 0;
|
225
328
|
if (locating++) goto out; /* do not recurse into another *alloc */
|
226
329
|
|
227
|
-
|
330
|
+
rcu_read_lock();
|
331
|
+
if (has_ec_p()) {
|
228
332
|
int line;
|
229
333
|
const char *ptr = rb_source_location_cstr(&line);
|
230
334
|
size_t len;
|
231
335
|
size_t int_size = INT2STR_MAX;
|
232
336
|
|
337
|
+
generation = rb_gc_count();
|
338
|
+
|
233
339
|
if (!ptr) goto unknown;
|
234
340
|
|
235
341
|
/* avoid vsnprintf or anything which could call malloc here: */
|
236
342
|
len = strlen(ptr);
|
237
|
-
k =
|
343
|
+
k = (void *)kbuf;
|
238
344
|
k->total = size;
|
239
345
|
dst = mempcpy(k->k, ptr, len);
|
240
346
|
*dst++ = ':';
|
@@ -243,7 +349,7 @@ static void update_stats(size_t size, uintptr_t caller)
|
|
243
349
|
*dst = 0; /* terminate string */
|
244
350
|
k->capa = (uint32_t)(dst - k->k + 1);
|
245
351
|
k->hval = jhash(k->k, k->capa, 0xdeadbeef);
|
246
|
-
|
352
|
+
ret = totals_add_rcu(k);
|
247
353
|
} else {
|
248
354
|
rb_bug("bad math making key from location %s:%d\n",
|
249
355
|
ptr, line);
|
@@ -255,36 +361,281 @@ unknown:
|
|
255
361
|
memcpy(k->k, &caller, xlen);
|
256
362
|
k->capa = 0;
|
257
363
|
k->hval = jhash(k->k, xlen, 0xdeadbeef);
|
258
|
-
|
364
|
+
ret = totals_add_rcu(k);
|
259
365
|
}
|
260
366
|
out:
|
261
367
|
--locating;
|
368
|
+
return ret;
|
369
|
+
}
|
370
|
+
|
371
|
+
size_t malloc_usable_size(void *p)
|
372
|
+
{
|
373
|
+
return ptr2hdr(p)->size;
|
374
|
+
}
|
375
|
+
|
376
|
+
static void
|
377
|
+
free_hdr_rcu(struct rcu_head *dead)
|
378
|
+
{
|
379
|
+
struct alloc_hdr *h = caa_container_of(dead, struct alloc_hdr, as.dead);
|
380
|
+
real_free(h->real);
|
381
|
+
}
|
382
|
+
|
383
|
+
void free(void *p)
|
384
|
+
{
|
385
|
+
if (p) {
|
386
|
+
struct alloc_hdr *h = ptr2hdr(p);
|
387
|
+
struct src_loc *l = h->as.live.loc;
|
388
|
+
|
389
|
+
if (!real_free) return; /* oh well, leak a little */
|
390
|
+
if (l) {
|
391
|
+
size_t age = generation - h->as.live.gen;
|
392
|
+
|
393
|
+
uatomic_set(&h->size, 0);
|
394
|
+
uatomic_add(&l->frees, 1);
|
395
|
+
uatomic_add(&l->age_total, age);
|
396
|
+
|
397
|
+
mutex_lock(l->mtx);
|
398
|
+
cds_list_del_rcu(&h->anode);
|
399
|
+
if (age > l->max_lifespan)
|
400
|
+
l->max_lifespan = age;
|
401
|
+
mutex_unlock(l->mtx);
|
402
|
+
|
403
|
+
call_rcu(&h->as.dead, free_hdr_rcu);
|
404
|
+
}
|
405
|
+
else {
|
406
|
+
real_free(h->real);
|
407
|
+
}
|
408
|
+
}
|
409
|
+
}
|
410
|
+
|
411
|
+
static void
|
412
|
+
alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real)
|
413
|
+
{
|
414
|
+
/* we need src_loc to remain alive for the duration of this call */
|
415
|
+
if (!h) return;
|
416
|
+
h->size = size;
|
417
|
+
h->real = real;
|
418
|
+
h->as.live.loc = l;
|
419
|
+
h->as.live.gen = generation;
|
420
|
+
if (l) {
|
421
|
+
mutex_lock(l->mtx);
|
422
|
+
cds_list_add_rcu(&h->anode, &l->allocs);
|
423
|
+
mutex_unlock(l->mtx);
|
424
|
+
}
|
425
|
+
}
|
426
|
+
|
427
|
+
static size_t size_align(size_t size, size_t alignment)
|
428
|
+
{
|
429
|
+
return ((size + (alignment - 1)) & ~(alignment - 1));
|
430
|
+
}
|
431
|
+
|
432
|
+
static bool ptr_is_aligned(void *ptr, size_t alignment)
|
433
|
+
{
|
434
|
+
return ((uintptr_t)ptr & (alignment - 1)) == 0;
|
435
|
+
}
|
436
|
+
|
437
|
+
static void *ptr_align(void *ptr, size_t alignment)
|
438
|
+
{
|
439
|
+
return (void *)(((uintptr_t)ptr + (alignment - 1)) & ~(alignment - 1));
|
440
|
+
}
|
441
|
+
|
442
|
+
static bool is_power_of_two(size_t n) { return (n & (n - 1)) == 0; }
|
443
|
+
|
444
|
+
static int
|
445
|
+
internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
|
446
|
+
{
|
447
|
+
struct src_loc *l;
|
448
|
+
struct alloc_hdr *h;
|
449
|
+
void *real;
|
450
|
+
size_t asize;
|
451
|
+
size_t d = alignment / sizeof(void*);
|
452
|
+
size_t r = alignment % sizeof(void*);
|
453
|
+
|
454
|
+
if (!real_malloc) return ENOMEM;
|
455
|
+
|
456
|
+
if (r != 0 || d == 0 || !is_power_of_two(d))
|
457
|
+
return EINVAL;
|
458
|
+
|
459
|
+
if (alignment <= ASSUMED_MALLOC_ALIGNMENT) {
|
460
|
+
void *p = malloc(size);
|
461
|
+
if (!p) return ENOMEM;
|
462
|
+
*pp = p;
|
463
|
+
return 0;
|
464
|
+
}
|
465
|
+
for (; alignment < sizeof(struct alloc_hdr); alignment *= 2)
|
466
|
+
; /* double alignment until >= sizeof(struct alloc_hdr) */
|
467
|
+
if (__builtin_add_overflow(size, alignment, &asize) ||
|
468
|
+
__builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
|
469
|
+
return ENOMEM;
|
470
|
+
|
471
|
+
/* assert(asize == (alignment + size + sizeof(struct alloc_hdr))); */
|
472
|
+
l = track_memalign ? update_stats_rcu_lock(size, caller) : 0;
|
473
|
+
real = real_malloc(asize);
|
474
|
+
if (real) {
|
475
|
+
void *p = hdr2ptr(real);
|
476
|
+
if (!ptr_is_aligned(p, alignment))
|
477
|
+
p = ptr_align(p, alignment);
|
478
|
+
h = ptr2hdr(p);
|
479
|
+
alloc_insert_rcu(l, h, size, real);
|
480
|
+
*pp = p;
|
481
|
+
}
|
482
|
+
update_stats_rcu_unlock(l);
|
483
|
+
|
484
|
+
return real ? 0 : ENOMEM;
|
485
|
+
}
|
486
|
+
|
487
|
+
static void *
|
488
|
+
memalign_result(int err, void *p)
|
489
|
+
{
|
490
|
+
if (caa_unlikely(err)) {
|
491
|
+
errno = err;
|
492
|
+
return 0;
|
493
|
+
}
|
494
|
+
return p;
|
495
|
+
}
|
496
|
+
|
497
|
+
void *memalign(size_t alignment, size_t size)
|
498
|
+
{
|
499
|
+
void *p;
|
500
|
+
int err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
|
501
|
+
return memalign_result(err, p);
|
502
|
+
}
|
503
|
+
|
504
|
+
int posix_memalign(void **p, size_t alignment, size_t size)
|
505
|
+
{
|
506
|
+
return internal_memalign(p, alignment, size, RETURN_ADDRESS(0));
|
507
|
+
}
|
508
|
+
|
509
|
+
void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign")));
|
510
|
+
void cfree(void *) __attribute__((alias("free")));
|
511
|
+
|
512
|
+
void *valloc(size_t size)
|
513
|
+
{
|
514
|
+
void *p;
|
515
|
+
int err = internal_memalign(&p, page_size, size, RETURN_ADDRESS(0));
|
516
|
+
return memalign_result(err, p);
|
517
|
+
}
|
518
|
+
|
519
|
+
#if __GNUC__ < 7
|
520
|
+
# define add_overflow_p(a,b) __extension__({ \
|
521
|
+
__typeof__(a) _c; \
|
522
|
+
__builtin_add_overflow(a,b,&_c); \
|
523
|
+
})
|
524
|
+
#else
|
525
|
+
# define add_overflow_p(a,b) \
|
526
|
+
__builtin_add_overflow_p((a),(b),(__typeof__(a+b))0)
|
527
|
+
#endif
|
528
|
+
|
529
|
+
void *pvalloc(size_t size)
|
530
|
+
{
|
531
|
+
size_t alignment = page_size;
|
532
|
+
void *p;
|
533
|
+
int err;
|
534
|
+
|
535
|
+
if (add_overflow_p(size, alignment)) {
|
536
|
+
errno = ENOMEM;
|
537
|
+
return 0;
|
538
|
+
}
|
539
|
+
size = size_align(size, alignment);
|
540
|
+
err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
|
541
|
+
return memalign_result(err, p);
|
262
542
|
}
|
263
543
|
|
264
|
-
/*
|
265
|
-
* Do we care for *memalign? ruby/gc.c uses it in ways this lib
|
266
|
-
* doesn't care about, but maybe some gems use it, too.
|
267
|
-
*/
|
268
544
|
void *malloc(size_t size)
|
269
545
|
{
|
270
|
-
|
271
|
-
|
272
|
-
|
546
|
+
struct src_loc *l;
|
547
|
+
struct alloc_hdr *h;
|
548
|
+
size_t asize;
|
549
|
+
void *p;
|
550
|
+
|
551
|
+
if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
|
552
|
+
goto enomem;
|
553
|
+
|
554
|
+
/*
|
555
|
+
* Needed for C++ global declarations using "new",
|
556
|
+
* which happens before our constructor
|
557
|
+
*/
|
558
|
+
#ifndef __FreeBSD__
|
559
|
+
if (!real_malloc) {
|
560
|
+
if (resolving_malloc) goto enomem;
|
561
|
+
resolving_malloc = 1;
|
562
|
+
real_malloc = dlsym(RTLD_NEXT, "malloc");
|
563
|
+
}
|
564
|
+
#endif
|
565
|
+
l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
|
566
|
+
p = h = real_malloc(asize);
|
567
|
+
if (h) {
|
568
|
+
alloc_insert_rcu(l, h, size, h);
|
569
|
+
p = hdr2ptr(h);
|
570
|
+
}
|
571
|
+
update_stats_rcu_unlock(l);
|
572
|
+
if (caa_unlikely(!p)) errno = ENOMEM;
|
573
|
+
return p;
|
574
|
+
enomem:
|
575
|
+
errno = ENOMEM;
|
576
|
+
return 0;
|
273
577
|
}
|
274
578
|
|
275
579
|
void *calloc(size_t nmemb, size_t size)
|
276
580
|
{
|
581
|
+
void *p;
|
582
|
+
struct src_loc *l;
|
583
|
+
struct alloc_hdr *h;
|
584
|
+
size_t asize;
|
585
|
+
|
586
|
+
if (__builtin_mul_overflow(size, nmemb, &size)) {
|
587
|
+
errno = ENOMEM;
|
588
|
+
return 0;
|
589
|
+
}
|
590
|
+
if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
|
591
|
+
errno = ENOMEM;
|
592
|
+
return 0;
|
593
|
+
}
|
277
594
|
RETURN_IF_NOT_READY();
|
278
|
-
|
279
|
-
|
280
|
-
|
595
|
+
l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
|
596
|
+
p = h = real_malloc(asize);
|
597
|
+
if (p) {
|
598
|
+
alloc_insert_rcu(l, h, size, h);
|
599
|
+
p = hdr2ptr(h);
|
600
|
+
memset(p, 0, size);
|
601
|
+
}
|
602
|
+
update_stats_rcu_unlock(l);
|
603
|
+
if (caa_unlikely(!p)) errno = ENOMEM;
|
604
|
+
return p;
|
281
605
|
}
|
282
606
|
|
283
607
|
void *realloc(void *ptr, size_t size)
|
284
608
|
{
|
609
|
+
void *p;
|
610
|
+
struct src_loc *l;
|
611
|
+
struct alloc_hdr *h;
|
612
|
+
size_t asize;
|
613
|
+
|
614
|
+
if (!size) {
|
615
|
+
free(ptr);
|
616
|
+
return 0;
|
617
|
+
}
|
618
|
+
if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
|
619
|
+
errno = ENOMEM;
|
620
|
+
return 0;
|
621
|
+
}
|
285
622
|
RETURN_IF_NOT_READY();
|
286
|
-
|
287
|
-
|
623
|
+
|
624
|
+
l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
|
625
|
+
p = h = real_malloc(asize);
|
626
|
+
if (p) {
|
627
|
+
alloc_insert_rcu(l, h, size, h);
|
628
|
+
p = hdr2ptr(h);
|
629
|
+
}
|
630
|
+
update_stats_rcu_unlock(l);
|
631
|
+
|
632
|
+
if (ptr && p) {
|
633
|
+
struct alloc_hdr *old = ptr2hdr(ptr);
|
634
|
+
memcpy(p, ptr, old->size < size ? old->size : size);
|
635
|
+
free(ptr);
|
636
|
+
}
|
637
|
+
if (caa_unlikely(!p)) errno = ENOMEM;
|
638
|
+
return p;
|
288
639
|
}
|
289
640
|
|
290
641
|
struct dump_arg {
|
@@ -314,7 +665,7 @@ static void *dump_to_file(void *x)
|
|
314
665
|
p = s[0];
|
315
666
|
}
|
316
667
|
fprintf(a->fp, "%16zu %12zu %s\n",
|
317
|
-
l->total, l->
|
668
|
+
l->total, l->allocations, (const char *)p);
|
318
669
|
if (s) free(s);
|
319
670
|
}
|
320
671
|
out_unlock:
|
@@ -349,6 +700,7 @@ static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
|
|
349
700
|
io = *((VALUE *)dlsym(RTLD_DEFAULT, "rb_stderr"));
|
350
701
|
|
351
702
|
a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
|
703
|
+
io = rb_io_get_io(io);
|
352
704
|
io = rb_io_get_write_io(io);
|
353
705
|
GetOpenFile(io, fptr);
|
354
706
|
a.fp = rb_io_stdio_file(fptr);
|
@@ -358,49 +710,6 @@ static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
|
|
358
710
|
return Qnil;
|
359
711
|
}
|
360
712
|
|
361
|
-
static void
|
362
|
-
free_src_loc(struct rcu_head *head)
|
363
|
-
{
|
364
|
-
struct src_loc *l = caa_container_of(head, struct src_loc, rcu_head);
|
365
|
-
free(l);
|
366
|
-
}
|
367
|
-
|
368
|
-
static void *totals_clear(void *ign)
|
369
|
-
{
|
370
|
-
struct cds_lfht *new, *old;
|
371
|
-
struct cds_lfht_iter iter;
|
372
|
-
struct src_loc *l;
|
373
|
-
|
374
|
-
new = lfht_new();
|
375
|
-
rcu_read_lock();
|
376
|
-
old = rcu_dereference(totals);
|
377
|
-
rcu_assign_pointer(totals, new);
|
378
|
-
cds_lfht_for_each_entry(old, &iter, l, hnode) {
|
379
|
-
cds_lfht_del(old, &l->hnode);
|
380
|
-
call_rcu(&l->rcu_head, free_src_loc);
|
381
|
-
}
|
382
|
-
rcu_read_unlock();
|
383
|
-
|
384
|
-
synchronize_rcu(); /* ensure totals points to new */
|
385
|
-
cds_lfht_destroy(old, NULL);
|
386
|
-
return 0;
|
387
|
-
}
|
388
|
-
|
389
|
-
/*
|
390
|
-
* call-seq:
|
391
|
-
*
|
392
|
-
* Mwrap.clear -> nil
|
393
|
-
*
|
394
|
-
* Atomically replaces the totals table and destroys the old one.
|
395
|
-
* This resets all statistics. It is more expensive than `Mwrap.reset'
|
396
|
-
* as new allocations will need to be made to repopulate the new table.
|
397
|
-
*/
|
398
|
-
static VALUE mwrap_clear(VALUE mod)
|
399
|
-
{
|
400
|
-
rb_thread_call_without_gvl(totals_clear, 0, 0, 0);
|
401
|
-
return Qnil;
|
402
|
-
}
|
403
|
-
|
404
713
|
static void *totals_reset(void *ign)
|
405
714
|
{
|
406
715
|
struct cds_lfht *t;
|
@@ -411,7 +720,10 @@ static void *totals_reset(void *ign)
|
|
411
720
|
t = rcu_dereference(totals);
|
412
721
|
cds_lfht_for_each_entry(t, &iter, l, hnode) {
|
413
722
|
uatomic_set(&l->total, 0);
|
414
|
-
uatomic_set(&l->
|
723
|
+
uatomic_set(&l->allocations, 0);
|
724
|
+
uatomic_set(&l->frees, 0);
|
725
|
+
uatomic_set(&l->age_total, 0);
|
726
|
+
uatomic_set(&l->max_lifespan, 0);
|
415
727
|
}
|
416
728
|
rcu_read_unlock();
|
417
729
|
return 0;
|
@@ -423,8 +735,8 @@ static void *totals_reset(void *ign)
|
|
423
735
|
* Mwrap.reset -> nil
|
424
736
|
*
|
425
737
|
* Resets the the total tables by zero-ing all counters.
|
426
|
-
* This resets all statistics
|
427
|
-
*
|
738
|
+
* This resets all statistics. This is not an atomic operation
|
739
|
+
* as other threads (outside of GVL) may increment counters.
|
428
740
|
*/
|
429
741
|
static VALUE mwrap_reset(VALUE mod)
|
430
742
|
{
|
@@ -432,13 +744,40 @@ static VALUE mwrap_reset(VALUE mod)
|
|
432
744
|
return Qnil;
|
433
745
|
}
|
434
746
|
|
435
|
-
|
747
|
+
/* :nodoc: */
|
748
|
+
static VALUE mwrap_clear(VALUE mod)
|
749
|
+
{
|
750
|
+
return mwrap_reset(mod);
|
751
|
+
}
|
752
|
+
|
753
|
+
static VALUE rcu_unlock_ensure(VALUE ignored)
|
436
754
|
{
|
437
755
|
rcu_read_unlock();
|
438
756
|
--locating;
|
439
757
|
return Qfalse;
|
440
758
|
}
|
441
759
|
|
760
|
+
static VALUE location_string(struct src_loc *l)
|
761
|
+
{
|
762
|
+
VALUE ret, tmp;
|
763
|
+
|
764
|
+
if (loc_is_addr(l)) {
|
765
|
+
char **s = backtrace_symbols((void *)l->k, 1);
|
766
|
+
tmp = rb_str_new_cstr(s[0]);
|
767
|
+
free(s);
|
768
|
+
}
|
769
|
+
else {
|
770
|
+
tmp = rb_str_new(l->k, l->capa - 1);
|
771
|
+
}
|
772
|
+
|
773
|
+
/* deduplicate and try to free up some memory */
|
774
|
+
ret = rb_funcall(tmp, id_uminus, 0);
|
775
|
+
if (!OBJ_FROZEN_RAW(tmp))
|
776
|
+
rb_str_resize(tmp, 0);
|
777
|
+
|
778
|
+
return ret;
|
779
|
+
}
|
780
|
+
|
442
781
|
static VALUE dump_each_rcu(VALUE x)
|
443
782
|
{
|
444
783
|
struct dump_arg *a = (struct dump_arg *)x;
|
@@ -448,27 +787,17 @@ static VALUE dump_each_rcu(VALUE x)
|
|
448
787
|
|
449
788
|
t = rcu_dereference(totals);
|
450
789
|
cds_lfht_for_each_entry(t, &iter, l, hnode) {
|
451
|
-
VALUE v[
|
790
|
+
VALUE v[6];
|
452
791
|
if (l->total <= a->min) continue;
|
453
792
|
|
454
|
-
|
455
|
-
char **s = backtrace_symbols((void *)l->k, 1);
|
456
|
-
v[1] = rb_str_new_cstr(s[0]);
|
457
|
-
free(s);
|
458
|
-
}
|
459
|
-
else {
|
460
|
-
v[1] = rb_str_new(l->k, l->capa - 1);
|
461
|
-
}
|
462
|
-
|
463
|
-
/* deduplicate and try to free up some memory */
|
464
|
-
v[0] = rb_funcall(v[1], id_uminus, 0);
|
465
|
-
if (!OBJ_FROZEN_RAW(v[1]))
|
466
|
-
rb_str_resize(v[1], 0);
|
467
|
-
|
793
|
+
v[0] = location_string(l);
|
468
794
|
v[1] = SIZET2NUM(l->total);
|
469
|
-
v[2] = SIZET2NUM(l->
|
795
|
+
v[2] = SIZET2NUM(l->allocations);
|
796
|
+
v[3] = SIZET2NUM(l->frees);
|
797
|
+
v[4] = SIZET2NUM(l->age_total);
|
798
|
+
v[5] = SIZET2NUM(l->max_lifespan);
|
470
799
|
|
471
|
-
rb_yield_values2(
|
800
|
+
rb_yield_values2(6, v);
|
472
801
|
assert(rcu_read_ongoing());
|
473
802
|
}
|
474
803
|
return Qnil;
|
@@ -477,10 +806,12 @@ static VALUE dump_each_rcu(VALUE x)
|
|
477
806
|
/*
|
478
807
|
* call-seq:
|
479
808
|
*
|
480
|
-
*
|
809
|
+
* Mwrap.each([min]) do |location,total,allocations,frees,age_total,max_lifespan|
|
810
|
+
* ...
|
811
|
+
* end
|
481
812
|
*
|
482
813
|
* Yields each entry of the of the table to a caller-supplied block.
|
483
|
-
* +min+ may be specified to filter out lines with +
|
814
|
+
* +min+ may be specified to filter out lines with +total+ bytes
|
484
815
|
* equal-to-or-smaller-than the supplied minimum.
|
485
816
|
*/
|
486
817
|
static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
|
@@ -494,7 +825,212 @@ static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
|
|
494
825
|
++locating;
|
495
826
|
rcu_read_lock();
|
496
827
|
|
497
|
-
return rb_ensure(dump_each_rcu, (VALUE)&a,
|
828
|
+
return rb_ensure(dump_each_rcu, (VALUE)&a, rcu_unlock_ensure, 0);
|
829
|
+
}
|
830
|
+
|
831
|
+
static size_t
|
832
|
+
src_loc_memsize(const void *p)
|
833
|
+
{
|
834
|
+
return sizeof(struct src_loc);
|
835
|
+
}
|
836
|
+
|
837
|
+
static const rb_data_type_t src_loc_type = {
|
838
|
+
"source_location",
|
839
|
+
/* no marking, no freeing */
|
840
|
+
{ 0, 0, src_loc_memsize, /* reserved */ },
|
841
|
+
/* parent, data, [ flags ] */
|
842
|
+
};
|
843
|
+
|
844
|
+
static VALUE cSrcLoc;
|
845
|
+
|
846
|
+
static int
|
847
|
+
extract_addr(const char *str, size_t len, void **p)
|
848
|
+
{
|
849
|
+
const char *c;
|
850
|
+
#if defined(__GLIBC__)
|
851
|
+
return ((c = memrchr(str, '[', len)) && sscanf(c, "[%p]", p));
|
852
|
+
#else /* tested FreeBSD */
|
853
|
+
return ((c = strstr(str, "0x")) && sscanf(c, "%p", p));
|
854
|
+
#endif
|
855
|
+
}
|
856
|
+
|
857
|
+
/*
|
858
|
+
* call-seq:
|
859
|
+
* Mwrap[location] -> Mwrap::SourceLocation
|
860
|
+
*
|
861
|
+
* Returns the associated Mwrap::SourceLocation given the +location+
|
862
|
+
* String. +location+ is either a Ruby source location path:line
|
863
|
+
* (e.g. "/path/to/foo.rb:5") or a hexadecimal memory address with
|
864
|
+
* square-braces part yielded by Mwrap.dump (e.g. "[0xdeadbeef]")
|
865
|
+
*/
|
866
|
+
static VALUE mwrap_aref(VALUE mod, VALUE loc)
|
867
|
+
{
|
868
|
+
const char *str = StringValueCStr(loc);
|
869
|
+
int len = RSTRING_LENINT(loc);
|
870
|
+
struct src_loc *k = 0;
|
871
|
+
uintptr_t p;
|
872
|
+
struct cds_lfht_iter iter;
|
873
|
+
struct cds_lfht_node *cur;
|
874
|
+
struct cds_lfht *t;
|
875
|
+
struct src_loc *l;
|
876
|
+
VALUE val = Qnil;
|
877
|
+
|
878
|
+
if (extract_addr(str, len, (void **)&p)) {
|
879
|
+
k = (void *)kbuf;
|
880
|
+
memcpy(k->k, &p, sizeof(p));
|
881
|
+
k->capa = 0;
|
882
|
+
k->hval = jhash(k->k, sizeof(p), 0xdeadbeef);
|
883
|
+
} else {
|
884
|
+
k = (void *)kbuf;
|
885
|
+
memcpy(k->k, str, len + 1);
|
886
|
+
k->capa = len + 1;
|
887
|
+
k->hval = jhash(k->k, k->capa, 0xdeadbeef);
|
888
|
+
}
|
889
|
+
|
890
|
+
if (!k) return val;
|
891
|
+
|
892
|
+
rcu_read_lock();
|
893
|
+
t = rcu_dereference(totals);
|
894
|
+
if (!t) goto out_unlock;
|
895
|
+
|
896
|
+
cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
|
897
|
+
cur = cds_lfht_iter_get_node(&iter);
|
898
|
+
if (cur) {
|
899
|
+
l = caa_container_of(cur, struct src_loc, hnode);
|
900
|
+
val = TypedData_Wrap_Struct(cSrcLoc, &src_loc_type, l);
|
901
|
+
}
|
902
|
+
out_unlock:
|
903
|
+
rcu_read_unlock();
|
904
|
+
return val;
|
905
|
+
}
|
906
|
+
|
907
|
+
static VALUE src_loc_each_i(VALUE p)
|
908
|
+
{
|
909
|
+
struct alloc_hdr *h;
|
910
|
+
struct src_loc *l = (struct src_loc *)p;
|
911
|
+
|
912
|
+
cds_list_for_each_entry_rcu(h, &l->allocs, anode) {
|
913
|
+
size_t gen = uatomic_read(&h->as.live.gen);
|
914
|
+
size_t size = uatomic_read(&h->size);
|
915
|
+
|
916
|
+
if (size) {
|
917
|
+
VALUE v[2];
|
918
|
+
v[0] = SIZET2NUM(size);
|
919
|
+
v[1] = SIZET2NUM(gen);
|
920
|
+
|
921
|
+
rb_yield_values2(2, v);
|
922
|
+
}
|
923
|
+
}
|
924
|
+
|
925
|
+
return Qfalse;
|
926
|
+
}
|
927
|
+
|
928
|
+
static struct src_loc *src_loc_get(VALUE self)
|
929
|
+
{
|
930
|
+
struct src_loc *l;
|
931
|
+
TypedData_Get_Struct(self, struct src_loc, &src_loc_type, l);
|
932
|
+
assert(l);
|
933
|
+
return l;
|
934
|
+
}
|
935
|
+
|
936
|
+
/*
|
937
|
+
* call-seq:
|
938
|
+
* loc = Mwrap[location]
|
939
|
+
* loc.each { |size,generation| ... }
|
940
|
+
*
|
941
|
+
* Iterates through live allocations for a given Mwrap::SourceLocation,
|
942
|
+
* yielding the +size+ (in bytes) and +generation+ of each allocation.
|
943
|
+
* The +generation+ is the value of the GC.count method at the time
|
944
|
+
* the allocation was made.
|
945
|
+
*
|
946
|
+
* This functionality is only available in mwrap 2.0.0+
|
947
|
+
*/
|
948
|
+
static VALUE src_loc_each(VALUE self)
|
949
|
+
{
|
950
|
+
struct src_loc *l = src_loc_get(self);
|
951
|
+
|
952
|
+
assert(locating == 0 && "forgot to clear locating");
|
953
|
+
++locating;
|
954
|
+
rcu_read_lock();
|
955
|
+
rb_ensure(src_loc_each_i, (VALUE)l, rcu_unlock_ensure, 0);
|
956
|
+
return self;
|
957
|
+
}
|
958
|
+
|
959
|
+
/*
|
960
|
+
* The the mean lifespan (in GC generations) of allocations made from this
|
961
|
+
* location. This does not account for live allocations.
|
962
|
+
*/
|
963
|
+
static VALUE src_loc_mean_lifespan(VALUE self)
|
964
|
+
{
|
965
|
+
struct src_loc *l = src_loc_get(self);
|
966
|
+
size_t tot, frees;
|
967
|
+
|
968
|
+
frees = uatomic_read(&l->frees);
|
969
|
+
tot = uatomic_read(&l->age_total);
|
970
|
+
return DBL2NUM(frees ? ((double)tot/(double)frees) : HUGE_VAL);
|
971
|
+
}
|
972
|
+
|
973
|
+
/* The number of frees made from this location */
|
974
|
+
static VALUE src_loc_frees(VALUE self)
|
975
|
+
{
|
976
|
+
return SIZET2NUM(uatomic_read(&src_loc_get(self)->frees));
|
977
|
+
}
|
978
|
+
|
979
|
+
/* The number of allocations made from this location */
|
980
|
+
static VALUE src_loc_allocations(VALUE self)
|
981
|
+
{
|
982
|
+
return SIZET2NUM(uatomic_read(&src_loc_get(self)->allocations));
|
983
|
+
}
|
984
|
+
|
985
|
+
/* The total number of bytes allocated from this location */
|
986
|
+
static VALUE src_loc_total(VALUE self)
|
987
|
+
{
|
988
|
+
return SIZET2NUM(uatomic_read(&src_loc_get(self)->total));
|
989
|
+
}
|
990
|
+
|
991
|
+
/*
|
992
|
+
* The maximum age (in GC generations) of an allocation before it was freed.
|
993
|
+
* This does not account for live allocations.
|
994
|
+
*/
|
995
|
+
static VALUE src_loc_max_lifespan(VALUE self)
|
996
|
+
{
|
997
|
+
return SIZET2NUM(uatomic_read(&src_loc_get(self)->max_lifespan));
|
998
|
+
}
|
999
|
+
|
1000
|
+
/*
|
1001
|
+
* Returns a frozen String location of the given SourceLocation object.
|
1002
|
+
*/
|
1003
|
+
static VALUE src_loc_name(VALUE self)
|
1004
|
+
{
|
1005
|
+
struct src_loc *l = src_loc_get(self);
|
1006
|
+
VALUE ret;
|
1007
|
+
|
1008
|
+
++locating;
|
1009
|
+
ret = location_string(l);
|
1010
|
+
--locating;
|
1011
|
+
return ret;
|
1012
|
+
}
|
1013
|
+
|
1014
|
+
static VALUE reset_locating(VALUE ign) { --locating; return Qfalse; }
|
1015
|
+
|
1016
|
+
/*
|
1017
|
+
* call-seq:
|
1018
|
+
*
|
1019
|
+
* Mwrap.quiet do |depth|
|
1020
|
+
* # expensive sort/calculate/emitting results of Mwrap.each
|
1021
|
+
* # affecting statistics of the rest of the app
|
1022
|
+
* end
|
1023
|
+
*
|
1024
|
+
* Stops allocation tracking inside the block. This is useful for
|
1025
|
+
* monitoring code which calls other Mwrap (or ObjectSpace/GC)
|
1026
|
+
* functions which unavoidably allocate memory.
|
1027
|
+
*
|
1028
|
+
* This feature was added in mwrap 2.0.0+
|
1029
|
+
*/
|
1030
|
+
static VALUE mwrap_quiet(VALUE mod)
|
1031
|
+
{
|
1032
|
+
size_t cur = ++locating;
|
1033
|
+
return rb_ensure(rb_yield, SIZET2NUM(cur), reset_locating, 0);
|
498
1034
|
}
|
499
1035
|
|
500
1036
|
/*
|
@@ -515,19 +1051,47 @@ static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
|
|
515
1051
|
* * dump_fd: a writable FD to dump to
|
516
1052
|
* * dump_path: a path to dump to, the file is opened in O_APPEND mode
|
517
1053
|
* * dump_min: the minimum allocation size (total) to dump
|
1054
|
+
* * memalign: use `1' to enable tracking the memalign family
|
518
1055
|
*
|
519
1056
|
* If both `dump_fd' and `dump_path' are specified, dump_path takes
|
520
1057
|
* precedence.
|
1058
|
+
*
|
1059
|
+
* Tracking the memalign family of functions is misleading for Ruby
|
1060
|
+
* applications, as heap page allocations can happen anywhere a
|
1061
|
+
* Ruby object is allocated, even in the coldest code paths.
|
1062
|
+
* Furthermore, it is rarely-used outside of the Ruby object allocator.
|
1063
|
+
* Thus tracking memalign functions is disabled by default.
|
521
1064
|
*/
|
522
1065
|
void Init_mwrap(void)
|
523
1066
|
{
|
524
|
-
VALUE mod
|
1067
|
+
VALUE mod;
|
1068
|
+
|
1069
|
+
++locating;
|
1070
|
+
mod = rb_define_module("Mwrap");
|
525
1071
|
id_uminus = rb_intern("-@");
|
526
1072
|
|
1073
|
+
/*
|
1074
|
+
* Represents a location in source code or library
|
1075
|
+
* address which calls a memory allocation. It is
|
1076
|
+
* updated automatically as allocations are made, so
|
1077
|
+
* there is no need to reload or reread it from Mwrap#[].
|
1078
|
+
* This class is only available since mwrap 2.0.0+.
|
1079
|
+
*/
|
1080
|
+
cSrcLoc = rb_define_class_under(mod, "SourceLocation", rb_cObject);
|
527
1081
|
rb_define_singleton_method(mod, "dump", mwrap_dump, -1);
|
528
|
-
rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
|
529
1082
|
rb_define_singleton_method(mod, "reset", mwrap_reset, 0);
|
1083
|
+
rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
|
530
1084
|
rb_define_singleton_method(mod, "each", mwrap_each, -1);
|
1085
|
+
rb_define_singleton_method(mod, "[]", mwrap_aref, 1);
|
1086
|
+
rb_define_singleton_method(mod, "quiet", mwrap_quiet, 0);
|
1087
|
+
rb_define_method(cSrcLoc, "each", src_loc_each, 0);
|
1088
|
+
rb_define_method(cSrcLoc, "frees", src_loc_frees, 0);
|
1089
|
+
rb_define_method(cSrcLoc, "allocations", src_loc_allocations, 0);
|
1090
|
+
rb_define_method(cSrcLoc, "total", src_loc_total, 0);
|
1091
|
+
rb_define_method(cSrcLoc, "mean_lifespan", src_loc_mean_lifespan, 0);
|
1092
|
+
rb_define_method(cSrcLoc, "max_lifespan", src_loc_max_lifespan, 0);
|
1093
|
+
rb_define_method(cSrcLoc, "name", src_loc_name, 0);
|
1094
|
+
--locating;
|
531
1095
|
}
|
532
1096
|
|
533
1097
|
/* rb_cloexec_open isn't usable by non-Ruby processes */
|
@@ -538,18 +1102,18 @@ void Init_mwrap(void)
|
|
538
1102
|
__attribute__ ((destructor))
|
539
1103
|
static void mwrap_dump_destructor(void)
|
540
1104
|
{
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
1105
|
+
const char *opt = getenv("MWRAP");
|
1106
|
+
const char *modes[] = { "a", "a+", "w", "w+", "r+" };
|
1107
|
+
struct dump_arg a;
|
1108
|
+
size_t i;
|
1109
|
+
int dump_fd;
|
546
1110
|
char *dump_path;
|
547
1111
|
|
548
1112
|
if (!opt)
|
549
1113
|
return;
|
550
1114
|
|
551
|
-
|
552
|
-
|
1115
|
+
++locating;
|
1116
|
+
if ((dump_path = strstr(opt, "dump_path:")) &&
|
553
1117
|
(dump_path += sizeof("dump_path")) &&
|
554
1118
|
*dump_path) {
|
555
1119
|
char *end = strchr(dump_path, ',');
|
@@ -594,5 +1158,5 @@ static void mwrap_dump_destructor(void)
|
|
594
1158
|
}
|
595
1159
|
dump_to_file(&a);
|
596
1160
|
out:
|
597
|
-
|
1161
|
+
--locating;
|
598
1162
|
}
|