mwrap 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,16 @@
1
+ # Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org>
2
+ # License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
3
+ require 'rake/testtask'
4
+ begin
5
+ require 'rake/extensiontask'
6
+ Rake::ExtensionTask.new('mwrap')
7
+ rescue LoadError
8
+ warn 'rake-compiler not available, cross compiling disabled'
9
+ end
10
+
11
+ Rake::TestTask.new(:test)
12
+ task :test => :compile
13
+ task :default => :compile
14
+
15
+ c_files = File.readlines('MANIFEST').grep(%r{ext/.*\.[ch]$}).map!(&:chomp!)
16
+ task 'compile:mwrap' => c_files
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/ruby
2
+ # frozen_string_literal: true
3
+ # Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org>
4
+ # License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
5
+ require 'mwrap'
6
+ mwrap_so = $".grep(%r{/mwrap\.so\z})[0] or abort "mwrap.so not loaded"
7
+ cur = ENV['LD_PRELOAD']
8
+ if cur
9
+ cur = cur.split(/[:\s]+/)
10
+ if !cur.include?(mwrap_so)
11
+ # drop old versions
12
+ cur.delete_if { |path| path.end_with?('/mwrap.so') }
13
+ cur.unshift(mwrap_so)
14
+ ENV['LD_PRELOAD'] = cur.join(':')
15
+ end
16
+ else
17
+ ENV['LD_PRELOAD'] = mwrap_so
18
+ end
19
+
20
+ # work around close-on-exec by default behavior in Ruby:
21
+ opts = {}
22
+ if ENV['MWRAP'] =~ /dump_fd:(\d+)/
23
+ dump_fd = $1.to_i
24
+ if dump_fd > 2
25
+ dump_io = IO.new(dump_fd)
26
+ opts[dump_fd] = dump_io
27
+ end
28
+ end
29
+ exec *ARGV, opts
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+ # Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org>
3
+ # License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
4
+ require 'mkmf'
5
+
6
+ have_func 'mempcpy'
7
+ have_library 'urcu-cds' or abort 'userspace RCU not installed'
8
+ have_header 'urcu/rculfhash.h' or abort 'rculfhash.h not found'
9
+ have_library 'urcu-bp' or abort 'liburcu-bp not found'
10
+ have_library 'dl'
11
+ have_library 'c'
12
+ have_library 'execinfo' # FreeBSD
13
+ create_makefile 'mwrap'
@@ -0,0 +1,256 @@
1
+ #ifndef _JHASH_H
2
+ #define _JHASH_H
3
+
4
+ /*
5
+ * jhash.h
6
+ *
7
+ * Example hash function.
8
+ *
9
+ * Copyright 2009-2012 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
10
+ *
11
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
12
+ * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
13
+ *
14
+ * Permission is hereby granted to use or copy this program for any
15
+ * purpose, provided the above notices are retained on all copies.
16
+ * Permission to modify the code and to distribute modified code is
17
+ * granted, provided the above notices are retained, and a notice that
18
+ * the code was modified is included with the above copyright notice.
19
+ */
20
+
21
+ /*
22
+ * Hash function
23
+ * Source: http://burtleburtle.net/bob/c/lookup3.c
24
+ * Originally Public Domain
25
+ */
26
+
27
+ #define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
28
+
29
+ #define mix(a, b, c) \
30
+ do { \
31
+ a -= c; a ^= rot(c, 4); c += b; \
32
+ b -= a; b ^= rot(a, 6); a += c; \
33
+ c -= b; c ^= rot(b, 8); b += a; \
34
+ a -= c; a ^= rot(c, 16); c += b; \
35
+ b -= a; b ^= rot(a, 19); a += c; \
36
+ c -= b; c ^= rot(b, 4); b += a; \
37
+ } while (0)
38
+
39
+ #define final(a, b, c) \
40
+ { \
41
+ c ^= b; c -= rot(b, 14); \
42
+ a ^= c; a -= rot(c, 11); \
43
+ b ^= a; b -= rot(a, 25); \
44
+ c ^= b; c -= rot(b, 16); \
45
+ a ^= c; a -= rot(c, 4); \
46
+ b ^= a; b -= rot(a, 14); \
47
+ c ^= b; c -= rot(b, 24); \
48
+ }
49
+
50
+ #if (BYTE_ORDER == LITTLE_ENDIAN)
51
+ #define HASH_LITTLE_ENDIAN 1
52
+ #else
53
+ #define HASH_LITTLE_ENDIAN 0
54
+ #endif
55
+
56
+ /*
57
+ *
58
+ * hashlittle() -- hash a variable-length key into a 32-bit value
59
+ * k : the key (the unaligned variable-length array of bytes)
60
+ * length : the length of the key, counting by bytes
61
+ * initval : can be any 4-byte value
62
+ * Returns a 32-bit value. Every bit of the key affects every bit of
63
+ * the return value. Two keys differing by one or two bits will have
64
+ * totally different hash values.
65
+ *
66
+ * The best hash table sizes are powers of 2. There is no need to do
67
+ * mod a prime (mod is sooo slow!). If you need less than 32 bits,
68
+ * use a bitmask. For example, if you need only 10 bits, do
69
+ * h = (h & hashmask(10));
70
+ * In which case, the hash table should have hashsize(10) elements.
71
+ *
72
+ * If you are hashing n strings (uint8_t **)k, do it like this:
73
+ * for (i = 0, h = 0; i < n; ++i) h = hashlittle(k[i], len[i], h);
74
+ *
75
+ * By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
76
+ * code any way you wish, private, educational, or commercial. It's free.
77
+ *
78
+ * Use for hash table lookup, or anything where one collision in 2^^32 is
79
+ * acceptable. Do NOT use for cryptographic purposes.
80
+ */
81
+ static
82
+ uint32_t hashlittle(const void *key, size_t length, uint32_t initval)
83
+ {
84
+ uint32_t a, b, c; /* internal state */
85
+ union {
86
+ const void *ptr;
87
+ size_t i;
88
+ } u;
89
+
90
+ /* Set up the internal state */
91
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
92
+
93
+ u.ptr = key;
94
+ if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
95
+ const uint32_t *k = (const uint32_t *) key; /* read 32-bit chunks */
96
+
97
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
98
+ while (length > 12) {
99
+ a += k[0];
100
+ b += k[1];
101
+ c += k[2];
102
+ mix(a, b, c);
103
+ length -= 12;
104
+ k += 3;
105
+ }
106
+
107
+ /*----------------------------- handle the last (probably partial) block */
108
+ /*
109
+ * "k[2]&0xffffff" actually reads beyond the end of the string, but
110
+ * then masks off the part it's not allowed to read. Because the
111
+ * string is aligned, the masked-off tail is in the same word as the
112
+ * rest of the string. Every machine with memory protection I've seen
113
+ * does it on word boundaries, so is OK with this. But VALGRIND will
114
+ * still catch it and complain. The masking trick does make the hash
115
+ * noticably faster for short strings (like English words).
116
+ */
117
+ #ifndef VALGRIND
118
+
119
+ switch (length) {
120
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
121
+ case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
122
+ case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
123
+ case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
124
+ case 8 : b+=k[1]; a+=k[0]; break;
125
+ case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
126
+ case 6 : b+=k[1]&0xffff; a+=k[0]; break;
127
+ case 5 : b+=k[1]&0xff; a+=k[0]; break;
128
+ case 4 : a+=k[0]; break;
129
+ case 3 : a+=k[0]&0xffffff; break;
130
+ case 2 : a+=k[0]&0xffff; break;
131
+ case 1 : a+=k[0]&0xff; break;
132
+ case 0 : return c; /* zero length strings require no mixing */
133
+ }
134
+
135
+ #else /* make valgrind happy */
136
+ {
137
+ const uint8_t *k8;
138
+
139
+ k8 = (const uint8_t *) k;
140
+ switch (length) {
141
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
142
+ case 11: c+=((uint32_t) k8[10])<<16; /* fall through */
143
+ case 10: c+=((uint32_t) k8[9])<<8; /* fall through */
144
+ case 9 : c+=k8[8]; /* fall through */
145
+ case 8 : b+=k[1]; a+=k[0]; break;
146
+ case 7 : b+=((uint32_t) k8[6])<<16; /* fall through */
147
+ case 6 : b+=((uint32_t) k8[5])<<8; /* fall through */
148
+ case 5 : b+=k8[4]; /* fall through */
149
+ case 4 : a+=k[0]; break;
150
+ case 3 : a+=((uint32_t) k8[2])<<16; /* fall through */
151
+ case 2 : a+=((uint32_t) k8[1])<<8; /* fall through */
152
+ case 1 : a+=k8[0]; break;
153
+ case 0 : return c;
154
+ }
155
+ }
156
+ #endif /* !valgrind */
157
+
158
+ } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
159
+ const uint16_t *k = (const uint16_t *) key; /* read 16-bit chunks */
160
+ const uint8_t *k8;
161
+
162
+ /*--------------- all but last block: aligned reads and different mixing */
163
+ while (length > 12)
164
+ {
165
+ a += k[0] + (((uint32_t) k[1])<<16);
166
+ b += k[2] + (((uint32_t) k[3])<<16);
167
+ c += k[4] + (((uint32_t) k[5])<<16);
168
+ mix(a, b, c);
169
+ length -= 12;
170
+ k += 6;
171
+ }
172
+
173
+ /*----------------------------- handle the last (probably partial) block */
174
+ k8 = (const uint8_t *) k;
175
+ switch(length)
176
+ {
177
+ case 12: c+=k[4]+(((uint32_t) k[5])<<16);
178
+ b+=k[2]+(((uint32_t) k[3])<<16);
179
+ a+=k[0]+(((uint32_t) k[1])<<16);
180
+ break;
181
+ case 11: c+=((uint32_t) k8[10])<<16; /* fall through */
182
+ case 10: c+=k[4];
183
+ b+=k[2]+(((uint32_t) k[3])<<16);
184
+ a+=k[0]+(((uint32_t) k[1])<<16);
185
+ break;
186
+ case 9 : c+=k8[8]; /* fall through */
187
+ case 8 : b+=k[2]+(((uint32_t) k[3])<<16);
188
+ a+=k[0]+(((uint32_t) k[1])<<16);
189
+ break;
190
+ case 7 : b+=((uint32_t) k8[6])<<16; /* fall through */
191
+ case 6 : b+=k[2];
192
+ a+=k[0]+(((uint32_t) k[1])<<16);
193
+ break;
194
+ case 5 : b+=k8[4]; /* fall through */
195
+ case 4 : a+=k[0]+(((uint32_t) k[1])<<16);
196
+ break;
197
+ case 3 : a+=((uint32_t) k8[2])<<16; /* fall through */
198
+ case 2 : a+=k[0];
199
+ break;
200
+ case 1 : a+=k8[0];
201
+ break;
202
+ case 0 : return c; /* zero length requires no mixing */
203
+ }
204
+
205
+ } else { /* need to read the key one byte at a time */
206
+ const uint8_t *k = (const uint8_t *)key;
207
+
208
+ /*--------------- all but the last block: affect some 32 bits of (a, b, c) */
209
+ while (length > 12) {
210
+ a += k[0];
211
+ a += ((uint32_t) k[1])<<8;
212
+ a += ((uint32_t) k[2])<<16;
213
+ a += ((uint32_t) k[3])<<24;
214
+ b += k[4];
215
+ b += ((uint32_t) k[5])<<8;
216
+ b += ((uint32_t) k[6])<<16;
217
+ b += ((uint32_t) k[7])<<24;
218
+ c += k[8];
219
+ c += ((uint32_t) k[9])<<8;
220
+ c += ((uint32_t) k[10])<<16;
221
+ c += ((uint32_t) k[11])<<24;
222
+ mix(a,b,c);
223
+ length -= 12;
224
+ k += 12;
225
+ }
226
+
227
+ /*-------------------------------- last block: affect all 32 bits of (c) */
228
+ switch (length) { /* all the case statements fall through */
229
+ case 12: c+=((uint32_t) k[11])<<24;
230
+ case 11: c+=((uint32_t) k[10])<<16;
231
+ case 10: c+=((uint32_t) k[9])<<8;
232
+ case 9 : c+=k[8];
233
+ case 8 : b+=((uint32_t) k[7])<<24;
234
+ case 7 : b+=((uint32_t) k[6])<<16;
235
+ case 6 : b+=((uint32_t) k[5])<<8;
236
+ case 5 : b+=k[4];
237
+ case 4 : a+=((uint32_t) k[3])<<24;
238
+ case 3 : a+=((uint32_t) k[2])<<16;
239
+ case 2 : a+=((uint32_t) k[1])<<8;
240
+ case 1 : a+=k[0];
241
+ break;
242
+ case 0 : return c;
243
+ }
244
+ }
245
+
246
+ final(a, b, c);
247
+ return c;
248
+ }
249
+
250
+ static inline
251
+ uint32_t jhash(const void *key, size_t length, uint32_t seed)
252
+ {
253
+ return hashlittle(key, length, seed);
254
+ }
255
+
256
+ #endif /* _JHASH_H */
@@ -0,0 +1,598 @@
1
+ /*
2
+ * Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org>
3
+ * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
4
+ */
5
+ #define _LGPL_SOURCE /* allows URCU to inline some stuff */
6
+ #include <ruby/ruby.h>
7
+ #include <ruby/thread.h>
8
+ #include <ruby/io.h>
9
+ #include <execinfo.h>
10
+ #include <stdio.h>
11
+ #include <stdlib.h>
12
+ #include <string.h>
13
+ #include <dlfcn.h>
14
+ #include <assert.h>
15
+ #include <errno.h>
16
+ #include <sys/types.h>
17
+ #include <sys/stat.h>
18
+ #include <fcntl.h>
19
+ #include <urcu-bp.h>
20
+ #include <urcu/rculfhash.h>
21
+ #include "jhash.h"
22
+
23
+ static ID id_uminus;
24
+ const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev */
25
+ static int *(*has_gvl_p)(void);
26
+ #ifdef __FreeBSD__
27
+ void *__malloc(size_t);
28
+ void *__calloc(size_t, size_t);
29
+ void *__realloc(void *, size_t);
30
+ static void *(*real_malloc)(size_t) = __malloc;
31
+ static void *(*real_calloc)(size_t, size_t) = __calloc;
32
+ static void *(*real_realloc)(void *, size_t) = __realloc;
33
+ # define RETURN_IF_NOT_READY() do {} while (0) /* nothing */
34
+ #else
35
+ static int ready;
36
+ static void *(*real_malloc)(size_t);
37
+ static void *(*real_calloc)(size_t, size_t);
38
+ static void *(*real_realloc)(void *, size_t);
39
+
40
+ /*
41
+ * we need to fake an OOM condition while dlsym is running,
42
+ * as that calls calloc under glibc, but we don't have the
43
+ * symbol for the jemalloc calloc, yet
44
+ */
45
+ # define RETURN_IF_NOT_READY() do { \
46
+ if (!ready) { \
47
+ errno = ENOMEM; \
48
+ return NULL; \
49
+ } \
50
+ } while (0)
51
+
52
+ #endif /* !FreeBSD */
53
+
54
+ /*
55
+ * rb_source_location_cstr relies on GET_EC(), and it's possible
56
+ * to have a native thread but no EC during the early and late
57
+ * (teardown) phases of the Ruby process
58
+ */
59
+ static void **ec_loc;
60
+
61
+ static struct cds_lfht *totals;
62
+
63
+ static struct cds_lfht *
64
+ lfht_new(void)
65
+ {
66
+ return cds_lfht_new(16384, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
67
+ }
68
+
69
+ __attribute__((constructor)) static void resolve_malloc(void)
70
+ {
71
+ int err;
72
+
73
+ #ifndef __FreeBSD__
74
+ real_malloc = dlsym(RTLD_NEXT, "malloc");
75
+ real_calloc = dlsym(RTLD_NEXT, "calloc");
76
+ real_realloc = dlsym(RTLD_NEXT, "realloc");
77
+ if (!real_calloc || !real_malloc || !real_realloc) {
78
+ fprintf(stderr, "missing calloc/malloc/realloc %p %p %p\n",
79
+ real_calloc, real_malloc, real_realloc);
80
+ _exit(1);
81
+ }
82
+ ready = 1;
83
+ #endif
84
+
85
+ totals = lfht_new();
86
+ if (!totals)
87
+ fprintf(stderr, "failed to allocate totals table\n");
88
+
89
+ err = pthread_atfork(call_rcu_before_fork,
90
+ call_rcu_after_fork_parent,
91
+ call_rcu_after_fork_child);
92
+ if (err)
93
+ fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err));
94
+
95
+ has_gvl_p = dlsym(RTLD_DEFAULT, "ruby_thread_has_gvl_p");
96
+
97
+ /*
98
+ * resolve dynamically so it doesn't break when LD_PRELOAD-ed
99
+ * into non-Ruby binaries
100
+ */
101
+ ec_loc = dlsym(RTLD_DEFAULT, "ruby_current_execution_context_ptr");
102
+ }
103
+
104
+ #ifndef HAVE_MEMPCPY
105
+ static void *
106
+ my_mempcpy(void *dest, const void *src, size_t n)
107
+ {
108
+ return (char *)memcpy(dest, src, n) + n;
109
+ }
110
+ #define mempcpy(dst,src,n) my_mempcpy(dst,src,n)
111
+ #endif
112
+
113
+ /* stolen from glibc: */
114
+ #define RETURN_ADDRESS(nr) \
115
+ (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
116
+
117
+ static __thread size_t locating;
118
+
119
+ #define INT2STR_MAX (sizeof(int) == 4 ? 10 : 19)
120
+ static char *int2str(int num, char *dst, size_t * size)
121
+ {
122
+ if (num <= 9) {
123
+ *size -= 1;
124
+ *dst++ = (char)(num + '0');
125
+ return dst;
126
+ } else {
127
+ char buf[INT2STR_MAX];
128
+ char *end = buf + sizeof(buf);
129
+ char *p = end;
130
+ size_t adj;
131
+
132
+ do {
133
+ *size -= 1;
134
+ *--p = (char)((num % 10) + '0');
135
+ num /= 10;
136
+ } while (num && *size);
137
+
138
+ if (!num) {
139
+ adj = end - p;
140
+ return mempcpy(dst, p, adj);
141
+ }
142
+ }
143
+ return NULL;
144
+ }
145
+
146
+ static int has_ec_p(void)
147
+ {
148
+ return (ec_loc && *ec_loc);
149
+ }
150
+
151
+ struct src_loc {
152
+ struct rcu_head rcu_head;
153
+ size_t calls;
154
+ size_t total;
155
+ struct cds_lfht_node hnode;
156
+ uint32_t hval;
157
+ uint32_t capa;
158
+ char k[];
159
+ };
160
+
161
+ static int loc_is_addr(const struct src_loc *l)
162
+ {
163
+ return l->capa == 0;
164
+ }
165
+
166
+ static size_t loc_size(const struct src_loc *l)
167
+ {
168
+ return loc_is_addr(l) ? sizeof(uintptr_t) : l->capa;
169
+ }
170
+
171
+ static int loc_eq(struct cds_lfht_node *node, const void *key)
172
+ {
173
+ const struct src_loc *existing;
174
+ const struct src_loc *k = key;
175
+
176
+ existing = caa_container_of(node, struct src_loc, hnode);
177
+
178
+ return (k->hval == existing->hval &&
179
+ k->capa == existing->capa &&
180
+ memcmp(k->k, existing->k, loc_size(k)) == 0);
181
+ }
182
+
183
+ static void totals_add(struct src_loc *k)
184
+ {
185
+ struct cds_lfht_iter iter;
186
+ struct cds_lfht_node *cur;
187
+ struct src_loc *l;
188
+ struct cds_lfht *t;
189
+
190
+
191
+ again:
192
+ rcu_read_lock();
193
+ t = rcu_dereference(totals);
194
+ if (!t) goto out_unlock;
195
+ cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
196
+ cur = cds_lfht_iter_get_node(&iter);
197
+ if (cur) {
198
+ l = caa_container_of(cur, struct src_loc, hnode);
199
+ uatomic_add(&l->total, k->total);
200
+ uatomic_add(&l->calls, 1);
201
+ } else {
202
+ size_t n = loc_size(k);
203
+ l = malloc(sizeof(*l) + n);
204
+ if (!l) goto out_unlock;
205
+
206
+ memcpy(l, k, sizeof(*l) + n);
207
+ l->calls = 1;
208
+ cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
209
+ if (cur != &l->hnode) { /* lost race */
210
+ rcu_read_unlock();
211
+ free(l);
212
+ goto again;
213
+ }
214
+ }
215
+ out_unlock:
216
+ rcu_read_unlock();
217
+ }
218
+
219
+ static void update_stats(size_t size, uintptr_t caller)
220
+ {
221
+ struct src_loc *k;
222
+ static const size_t xlen = sizeof(caller);
223
+ char *dst;
224
+
225
+ if (locating++) goto out; /* do not recurse into another *alloc */
226
+
227
+ if (has_gvl_p && has_gvl_p() && has_ec_p()) {
228
+ int line;
229
+ const char *ptr = rb_source_location_cstr(&line);
230
+ size_t len;
231
+ size_t int_size = INT2STR_MAX;
232
+
233
+ if (!ptr) goto unknown;
234
+
235
+ /* avoid vsnprintf or anything which could call malloc here: */
236
+ len = strlen(ptr);
237
+ k = alloca(sizeof(*k) + len + 1 + int_size + 1);
238
+ k->total = size;
239
+ dst = mempcpy(k->k, ptr, len);
240
+ *dst++ = ':';
241
+ dst = int2str(line, dst, &int_size);
242
+ if (dst) {
243
+ *dst = 0; /* terminate string */
244
+ k->capa = (uint32_t)(dst - k->k + 1);
245
+ k->hval = jhash(k->k, k->capa, 0xdeadbeef);
246
+ totals_add(k);
247
+ } else {
248
+ rb_bug("bad math making key from location %s:%d\n",
249
+ ptr, line);
250
+ }
251
+ } else {
252
+ unknown:
253
+ k = alloca(sizeof(*k) + xlen);
254
+ k->total = size;
255
+ memcpy(k->k, &caller, xlen);
256
+ k->capa = 0;
257
+ k->hval = jhash(k->k, xlen, 0xdeadbeef);
258
+ totals_add(k);
259
+ }
260
+ out:
261
+ --locating;
262
+ }
263
+
264
+ /*
265
+ * Do we care for *memalign? ruby/gc.c uses it in ways this lib
266
+ * doesn't care about, but maybe some gems use it, too.
267
+ */
268
+ void *malloc(size_t size)
269
+ {
270
+ RETURN_IF_NOT_READY();
271
+ update_stats(size, RETURN_ADDRESS(0));
272
+ return real_malloc(size);
273
+ }
274
+
275
+ void *calloc(size_t nmemb, size_t size)
276
+ {
277
+ RETURN_IF_NOT_READY();
278
+ /* ruby_xcalloc already does overflow checking */
279
+ update_stats(nmemb * size, RETURN_ADDRESS(0));
280
+ return real_calloc(nmemb, size);
281
+ }
282
+
283
+ void *realloc(void *ptr, size_t size)
284
+ {
285
+ RETURN_IF_NOT_READY();
286
+ update_stats(size, RETURN_ADDRESS(0));
287
+ return real_realloc(ptr, size);
288
+ }
289
+
290
+ struct dump_arg {
291
+ FILE *fp;
292
+ size_t min;
293
+ };
294
+
295
+ static void *dump_to_file(void *x)
296
+ {
297
+ struct dump_arg *a = x;
298
+ struct cds_lfht_iter iter;
299
+ struct src_loc *l;
300
+ struct cds_lfht *t;
301
+
302
+ ++locating;
303
+ rcu_read_lock();
304
+ t = rcu_dereference(totals);
305
+ if (!t)
306
+ goto out_unlock;
307
+ cds_lfht_for_each_entry(t, &iter, l, hnode) {
308
+ const void *p = l->k;
309
+ char **s = 0;
310
+ if (l->total <= a->min) continue;
311
+
312
+ if (loc_is_addr(l)) {
313
+ s = backtrace_symbols(p, 1);
314
+ p = s[0];
315
+ }
316
+ fprintf(a->fp, "%16zu %12zu %s\n",
317
+ l->total, l->calls, (const char *)p);
318
+ if (s) free(s);
319
+ }
320
+ out_unlock:
321
+ rcu_read_unlock();
322
+ --locating;
323
+ return 0;
324
+ }
325
+
326
+ /*
327
+ * call-seq:
328
+ *
329
+ * Mwrap.dump([[io] [, min]] -> nil
330
+ *
331
+ * Dumps the current totals to +io+ which must be an IO object
332
+ * (StringIO and similar are not supported). Total sizes smaller
333
+ * than or equal to +min+ are skipped.
334
+ *
335
+ * The output is space-delimited by 3 columns:
336
+ *
337
+ * total_size call_count location
338
+ */
339
+ static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
340
+ {
341
+ VALUE io, min;
342
+ struct dump_arg a;
343
+ rb_io_t *fptr;
344
+
345
+ rb_scan_args(argc, argv, "02", &io, &min);
346
+
347
+ if (NIL_P(io))
348
+ /* library may be linked w/o Ruby */
349
+ io = *((VALUE *)dlsym(RTLD_DEFAULT, "rb_stderr"));
350
+
351
+ a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
352
+ io = rb_io_get_write_io(io);
353
+ GetOpenFile(io, fptr);
354
+ a.fp = rb_io_stdio_file(fptr);
355
+
356
+ rb_thread_call_without_gvl(dump_to_file, &a, 0, 0);
357
+ RB_GC_GUARD(io);
358
+ return Qnil;
359
+ }
360
+
361
+ static void
362
+ free_src_loc(struct rcu_head *head)
363
+ {
364
+ struct src_loc *l = caa_container_of(head, struct src_loc, rcu_head);
365
+ free(l);
366
+ }
367
+
368
+ static void *totals_clear(void *ign)
369
+ {
370
+ struct cds_lfht *new, *old;
371
+ struct cds_lfht_iter iter;
372
+ struct src_loc *l;
373
+
374
+ new = lfht_new();
375
+ rcu_read_lock();
376
+ old = rcu_dereference(totals);
377
+ rcu_assign_pointer(totals, new);
378
+ cds_lfht_for_each_entry(old, &iter, l, hnode) {
379
+ cds_lfht_del(old, &l->hnode);
380
+ call_rcu(&l->rcu_head, free_src_loc);
381
+ }
382
+ rcu_read_unlock();
383
+
384
+ synchronize_rcu(); /* ensure totals points to new */
385
+ cds_lfht_destroy(old, NULL);
386
+ return 0;
387
+ }
388
+
389
+ /*
390
+ * call-seq:
391
+ *
392
+ * Mwrap.clear -> nil
393
+ *
394
+ * Atomically replaces the totals table and destroys the old one.
395
+ * This resets all statistics. It is more expensive than `Mwrap.reset'
396
+ * as new allocations will need to be made to repopulate the new table.
397
+ */
398
+ static VALUE mwrap_clear(VALUE mod)
399
+ {
400
+ rb_thread_call_without_gvl(totals_clear, 0, 0, 0);
401
+ return Qnil;
402
+ }
403
+
404
+ static void *totals_reset(void *ign)
405
+ {
406
+ struct cds_lfht *t;
407
+ struct cds_lfht_iter iter;
408
+ struct src_loc *l;
409
+
410
+ rcu_read_lock();
411
+ t = rcu_dereference(totals);
412
+ cds_lfht_for_each_entry(t, &iter, l, hnode) {
413
+ uatomic_set(&l->total, 0);
414
+ uatomic_set(&l->calls, 0);
415
+ }
416
+ rcu_read_unlock();
417
+ return 0;
418
+ }
419
+
420
+ /*
421
+ * call-seq:
422
+ *
423
+ * Mwrap.reset -> nil
424
+ *
425
+ * Resets the the total tables by zero-ing all counters.
426
+ * This resets all statistics and is less costly than `Mwrap.clear'
427
+ * but is not an atomic operation.
428
+ */
429
+ static VALUE mwrap_reset(VALUE mod)
430
+ {
431
+ rb_thread_call_without_gvl(totals_reset, 0, 0, 0);
432
+ return Qnil;
433
+ }
434
+
435
+ static VALUE dump_ensure(VALUE ignored)
436
+ {
437
+ rcu_read_unlock();
438
+ --locating;
439
+ return Qfalse;
440
+ }
441
+
442
+ static VALUE dump_each_rcu(VALUE x)
443
+ {
444
+ struct dump_arg *a = (struct dump_arg *)x;
445
+ struct cds_lfht *t;
446
+ struct cds_lfht_iter iter;
447
+ struct src_loc *l;
448
+
449
+ t = rcu_dereference(totals);
450
+ cds_lfht_for_each_entry(t, &iter, l, hnode) {
451
+ VALUE v[3];
452
+ if (l->total <= a->min) continue;
453
+
454
+ if (loc_is_addr(l)) {
455
+ char **s = backtrace_symbols((void *)l->k, 1);
456
+ v[1] = rb_str_new_cstr(s[0]);
457
+ free(s);
458
+ }
459
+ else {
460
+ v[1] = rb_str_new(l->k, l->capa - 1);
461
+ }
462
+
463
+ /* deduplicate and try to free up some memory */
464
+ v[0] = rb_funcall(v[1], id_uminus, 0);
465
+ if (!OBJ_FROZEN_RAW(v[1]))
466
+ rb_str_resize(v[1], 0);
467
+
468
+ v[1] = SIZET2NUM(l->total);
469
+ v[2] = SIZET2NUM(l->calls);
470
+
471
+ rb_yield_values2(3, v);
472
+ assert(rcu_read_ongoing());
473
+ }
474
+ return Qnil;
475
+ }
476
+
477
+ /*
478
+ * call-seq:
479
+ *
480
+ * Mwrap.each([min]) { |location,total_bytes,call_count| ... }
481
+ *
482
+ * Yields each entry of the of the table to a caller-supplied block.
483
+ * +min+ may be specified to filter out lines with +total_bytes+
484
+ * equal-to-or-smaller-than the supplied minimum.
485
+ */
486
+ static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
487
+ {
488
+ VALUE min;
489
+ struct dump_arg a;
490
+
491
+ rb_scan_args(argc, argv, "01", &min);
492
+ a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
493
+
494
+ ++locating;
495
+ rcu_read_lock();
496
+
497
+ return rb_ensure(dump_each_rcu, (VALUE)&a, dump_ensure, 0);
498
+ }
499
+
500
+ /*
501
+ * Document-module: Mwrap
502
+ *
503
+ * require 'mwrap'
504
+ *
505
+ * Mwrap has a dual function as both a Ruby C extension and LD_PRELOAD
506
+ * wrapper. As a Ruby C extension, it exposes a limited Ruby API.
507
+ * To be effective at gathering status, mwrap must be loaded as a
508
+ * LD_PRELOAD (using the mwrap(1) executable makes it easy)
509
+ *
510
+ * ENVIRONMENT
511
+ *
512
+ * The "MWRAP" environment variable contains a comma-delimited list
513
+ * of key:value options for automatically dumping at program exit.
514
+ *
515
+ * * dump_fd: a writable FD to dump to
516
+ * * dump_path: a path to dump to, the file is opened in O_APPEND mode
517
+ * * dump_min: the minimum allocation size (total) to dump
518
+ *
519
+ * If both `dump_fd' and `dump_path' are specified, dump_path takes
520
+ * precedence.
521
+ */
522
+ void Init_mwrap(void)
523
+ {
524
+ VALUE mod = rb_define_module("Mwrap");
525
+ id_uminus = rb_intern("-@");
526
+
527
+ rb_define_singleton_method(mod, "dump", mwrap_dump, -1);
528
+ rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
529
+ rb_define_singleton_method(mod, "reset", mwrap_reset, 0);
530
+ rb_define_singleton_method(mod, "each", mwrap_each, -1);
531
+ }
532
+
533
+ /* rb_cloexec_open isn't usable by non-Ruby processes */
534
+ #ifndef O_CLOEXEC
535
+ # define O_CLOEXEC 0
536
+ #endif
537
+
538
+ __attribute__ ((destructor))
539
+ static void mwrap_dump_destructor(void)
540
+ {
541
+ const char *opt = getenv("MWRAP");
542
+ const char *modes[] = { "a", "a+", "w", "w+", "r+" };
543
+ struct dump_arg a;
544
+ size_t i;
545
+ int dump_fd;
546
+ char *dump_path;
547
+
548
+ if (!opt)
549
+ return;
550
+
551
+ ++locating;
552
+ if ((dump_path = strstr(opt, "dump_path:")) &&
553
+ (dump_path += sizeof("dump_path")) &&
554
+ *dump_path) {
555
+ char *end = strchr(dump_path, ',');
556
+ if (end) {
557
+ char *tmp = alloca(end - dump_path + 1);
558
+ end = mempcpy(tmp, dump_path, end - dump_path);
559
+ *end = 0;
560
+ dump_path = tmp;
561
+ }
562
+ dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT,
563
+ 0666);
564
+ if (dump_fd < 0) {
565
+ fprintf(stderr, "open %s failed: %s\n", dump_path,
566
+ strerror(errno));
567
+ goto out;
568
+ }
569
+ }
570
+ else if (!sscanf(opt, "dump_fd:%d", &dump_fd))
571
+ goto out;
572
+
573
+ if (!sscanf(opt, "dump_min:%zu", &a.min))
574
+ a.min = 0;
575
+
576
+ switch (dump_fd) {
577
+ case 0: goto out;
578
+ case 1: a.fp = stdout; break;
579
+ case 2: a.fp = stderr; break;
580
+ default:
581
+ if (dump_fd < 0)
582
+ goto out;
583
+ a.fp = 0;
584
+
585
+ for (i = 0; !a.fp && i < 5; i++)
586
+ a.fp = fdopen(dump_fd, modes[i]);
587
+
588
+ if (!a.fp) {
589
+ fprintf(stderr, "failed to open fd=%d: %s\n",
590
+ dump_fd, strerror(errno));
591
+ goto out;
592
+ }
593
+ /* we'll leak some memory here, but this is a destructor */
594
+ }
595
+ dump_to_file(&a);
596
+ out:
597
+ --locating;
598
+ }