mwrap 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/COPYING +339 -0
- data/MANIFEST +12 -0
- data/README +86 -0
- data/Rakefile +16 -0
- data/bin/mwrap +29 -0
- data/ext/mwrap/extconf.rb +13 -0
- data/ext/mwrap/jhash.h +256 -0
- data/ext/mwrap/mwrap.c +598 -0
- data/mwrap.gemspec +30 -0
- data/test/test_mwrap.rb +149 -0
- metadata +87 -0
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org>
|
2
|
+
# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
|
3
|
+
require 'rake/testtask'
|
4
|
+
begin
|
5
|
+
require 'rake/extensiontask'
|
6
|
+
Rake::ExtensionTask.new('mwrap')
|
7
|
+
rescue LoadError
|
8
|
+
warn 'rake-compiler not available, cross compiling disabled'
|
9
|
+
end
|
10
|
+
|
11
|
+
Rake::TestTask.new(:test)
|
12
|
+
task :test => :compile
|
13
|
+
task :default => :compile
|
14
|
+
|
15
|
+
c_files = File.readlines('MANIFEST').grep(%r{ext/.*\.[ch]$}).map!(&:chomp!)
|
16
|
+
task 'compile:mwrap' => c_files
|
data/bin/mwrap
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
# Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org>
|
4
|
+
# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
|
5
|
+
require 'mwrap'
|
6
|
+
mwrap_so = $".grep(%r{/mwrap\.so\z})[0] or abort "mwrap.so not loaded"
|
7
|
+
cur = ENV['LD_PRELOAD']
|
8
|
+
if cur
|
9
|
+
cur = cur.split(/[:\s]+/)
|
10
|
+
if !cur.include?(mwrap_so)
|
11
|
+
# drop old versions
|
12
|
+
cur.delete_if { |path| path.end_with?('/mwrap.so') }
|
13
|
+
cur.unshift(mwrap_so)
|
14
|
+
ENV['LD_PRELOAD'] = cur.join(':')
|
15
|
+
end
|
16
|
+
else
|
17
|
+
ENV['LD_PRELOAD'] = mwrap_so
|
18
|
+
end
|
19
|
+
|
20
|
+
# work around close-on-exec by default behavior in Ruby:
|
21
|
+
opts = {}
|
22
|
+
if ENV['MWRAP'] =~ /dump_fd:(\d+)/
|
23
|
+
dump_fd = $1.to_i
|
24
|
+
if dump_fd > 2
|
25
|
+
dump_io = IO.new(dump_fd)
|
26
|
+
opts[dump_fd] = dump_io
|
27
|
+
end
|
28
|
+
end
|
29
|
+
exec *ARGV, opts
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
# Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org>
|
3
|
+
# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
|
4
|
+
require 'mkmf'
|
5
|
+
|
6
|
+
have_func 'mempcpy'
|
7
|
+
have_library 'urcu-cds' or abort 'userspace RCU not installed'
|
8
|
+
have_header 'urcu/rculfhash.h' or abort 'rculfhash.h not found'
|
9
|
+
have_library 'urcu-bp' or abort 'liburcu-bp not found'
|
10
|
+
have_library 'dl'
|
11
|
+
have_library 'c'
|
12
|
+
have_library 'execinfo' # FreeBSD
|
13
|
+
create_makefile 'mwrap'
|
data/ext/mwrap/jhash.h
ADDED
@@ -0,0 +1,256 @@
|
|
1
|
+
#ifndef _JHASH_H
|
2
|
+
#define _JHASH_H
|
3
|
+
|
4
|
+
/*
|
5
|
+
* jhash.h
|
6
|
+
*
|
7
|
+
* Example hash function.
|
8
|
+
*
|
9
|
+
* Copyright 2009-2012 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
|
10
|
+
*
|
11
|
+
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
|
12
|
+
* OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
13
|
+
*
|
14
|
+
* Permission is hereby granted to use or copy this program for any
|
15
|
+
* purpose, provided the above notices are retained on all copies.
|
16
|
+
* Permission to modify the code and to distribute modified code is
|
17
|
+
* granted, provided the above notices are retained, and a notice that
|
18
|
+
* the code was modified is included with the above copyright notice.
|
19
|
+
*/
|
20
|
+
|
21
|
+
/*
|
22
|
+
* Hash function
|
23
|
+
* Source: http://burtleburtle.net/bob/c/lookup3.c
|
24
|
+
* Originally Public Domain
|
25
|
+
*/
|
26
|
+
|
27
|
+
#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
|
28
|
+
|
29
|
+
#define mix(a, b, c) \
|
30
|
+
do { \
|
31
|
+
a -= c; a ^= rot(c, 4); c += b; \
|
32
|
+
b -= a; b ^= rot(a, 6); a += c; \
|
33
|
+
c -= b; c ^= rot(b, 8); b += a; \
|
34
|
+
a -= c; a ^= rot(c, 16); c += b; \
|
35
|
+
b -= a; b ^= rot(a, 19); a += c; \
|
36
|
+
c -= b; c ^= rot(b, 4); b += a; \
|
37
|
+
} while (0)
|
38
|
+
|
39
|
+
#define final(a, b, c) \
|
40
|
+
{ \
|
41
|
+
c ^= b; c -= rot(b, 14); \
|
42
|
+
a ^= c; a -= rot(c, 11); \
|
43
|
+
b ^= a; b -= rot(a, 25); \
|
44
|
+
c ^= b; c -= rot(b, 16); \
|
45
|
+
a ^= c; a -= rot(c, 4); \
|
46
|
+
b ^= a; b -= rot(a, 14); \
|
47
|
+
c ^= b; c -= rot(b, 24); \
|
48
|
+
}
|
49
|
+
|
50
|
+
#if (BYTE_ORDER == LITTLE_ENDIAN)
|
51
|
+
#define HASH_LITTLE_ENDIAN 1
|
52
|
+
#else
|
53
|
+
#define HASH_LITTLE_ENDIAN 0
|
54
|
+
#endif
|
55
|
+
|
56
|
+
/*
|
57
|
+
*
|
58
|
+
* hashlittle() -- hash a variable-length key into a 32-bit value
|
59
|
+
* k : the key (the unaligned variable-length array of bytes)
|
60
|
+
* length : the length of the key, counting by bytes
|
61
|
+
* initval : can be any 4-byte value
|
62
|
+
* Returns a 32-bit value. Every bit of the key affects every bit of
|
63
|
+
* the return value. Two keys differing by one or two bits will have
|
64
|
+
* totally different hash values.
|
65
|
+
*
|
66
|
+
* The best hash table sizes are powers of 2. There is no need to do
|
67
|
+
* mod a prime (mod is sooo slow!). If you need less than 32 bits,
|
68
|
+
* use a bitmask. For example, if you need only 10 bits, do
|
69
|
+
* h = (h & hashmask(10));
|
70
|
+
* In which case, the hash table should have hashsize(10) elements.
|
71
|
+
*
|
72
|
+
* If you are hashing n strings (uint8_t **)k, do it like this:
|
73
|
+
* for (i = 0, h = 0; i < n; ++i) h = hashlittle(k[i], len[i], h);
|
74
|
+
*
|
75
|
+
* By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
|
76
|
+
* code any way you wish, private, educational, or commercial. It's free.
|
77
|
+
*
|
78
|
+
* Use for hash table lookup, or anything where one collision in 2^^32 is
|
79
|
+
* acceptable. Do NOT use for cryptographic purposes.
|
80
|
+
*/
|
81
|
+
static
|
82
|
+
uint32_t hashlittle(const void *key, size_t length, uint32_t initval)
|
83
|
+
{
|
84
|
+
uint32_t a, b, c; /* internal state */
|
85
|
+
union {
|
86
|
+
const void *ptr;
|
87
|
+
size_t i;
|
88
|
+
} u;
|
89
|
+
|
90
|
+
/* Set up the internal state */
|
91
|
+
a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
|
92
|
+
|
93
|
+
u.ptr = key;
|
94
|
+
if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
|
95
|
+
const uint32_t *k = (const uint32_t *) key; /* read 32-bit chunks */
|
96
|
+
|
97
|
+
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
|
98
|
+
while (length > 12) {
|
99
|
+
a += k[0];
|
100
|
+
b += k[1];
|
101
|
+
c += k[2];
|
102
|
+
mix(a, b, c);
|
103
|
+
length -= 12;
|
104
|
+
k += 3;
|
105
|
+
}
|
106
|
+
|
107
|
+
/*----------------------------- handle the last (probably partial) block */
|
108
|
+
/*
|
109
|
+
* "k[2]&0xffffff" actually reads beyond the end of the string, but
|
110
|
+
* then masks off the part it's not allowed to read. Because the
|
111
|
+
* string is aligned, the masked-off tail is in the same word as the
|
112
|
+
* rest of the string. Every machine with memory protection I've seen
|
113
|
+
* does it on word boundaries, so is OK with this. But VALGRIND will
|
114
|
+
* still catch it and complain. The masking trick does make the hash
|
115
|
+
* noticably faster for short strings (like English words).
|
116
|
+
*/
|
117
|
+
#ifndef VALGRIND
|
118
|
+
|
119
|
+
switch (length) {
|
120
|
+
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
|
121
|
+
case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
|
122
|
+
case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
|
123
|
+
case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
|
124
|
+
case 8 : b+=k[1]; a+=k[0]; break;
|
125
|
+
case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
|
126
|
+
case 6 : b+=k[1]&0xffff; a+=k[0]; break;
|
127
|
+
case 5 : b+=k[1]&0xff; a+=k[0]; break;
|
128
|
+
case 4 : a+=k[0]; break;
|
129
|
+
case 3 : a+=k[0]&0xffffff; break;
|
130
|
+
case 2 : a+=k[0]&0xffff; break;
|
131
|
+
case 1 : a+=k[0]&0xff; break;
|
132
|
+
case 0 : return c; /* zero length strings require no mixing */
|
133
|
+
}
|
134
|
+
|
135
|
+
#else /* make valgrind happy */
|
136
|
+
{
|
137
|
+
const uint8_t *k8;
|
138
|
+
|
139
|
+
k8 = (const uint8_t *) k;
|
140
|
+
switch (length) {
|
141
|
+
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
|
142
|
+
case 11: c+=((uint32_t) k8[10])<<16; /* fall through */
|
143
|
+
case 10: c+=((uint32_t) k8[9])<<8; /* fall through */
|
144
|
+
case 9 : c+=k8[8]; /* fall through */
|
145
|
+
case 8 : b+=k[1]; a+=k[0]; break;
|
146
|
+
case 7 : b+=((uint32_t) k8[6])<<16; /* fall through */
|
147
|
+
case 6 : b+=((uint32_t) k8[5])<<8; /* fall through */
|
148
|
+
case 5 : b+=k8[4]; /* fall through */
|
149
|
+
case 4 : a+=k[0]; break;
|
150
|
+
case 3 : a+=((uint32_t) k8[2])<<16; /* fall through */
|
151
|
+
case 2 : a+=((uint32_t) k8[1])<<8; /* fall through */
|
152
|
+
case 1 : a+=k8[0]; break;
|
153
|
+
case 0 : return c;
|
154
|
+
}
|
155
|
+
}
|
156
|
+
#endif /* !valgrind */
|
157
|
+
|
158
|
+
} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
|
159
|
+
const uint16_t *k = (const uint16_t *) key; /* read 16-bit chunks */
|
160
|
+
const uint8_t *k8;
|
161
|
+
|
162
|
+
/*--------------- all but last block: aligned reads and different mixing */
|
163
|
+
while (length > 12)
|
164
|
+
{
|
165
|
+
a += k[0] + (((uint32_t) k[1])<<16);
|
166
|
+
b += k[2] + (((uint32_t) k[3])<<16);
|
167
|
+
c += k[4] + (((uint32_t) k[5])<<16);
|
168
|
+
mix(a, b, c);
|
169
|
+
length -= 12;
|
170
|
+
k += 6;
|
171
|
+
}
|
172
|
+
|
173
|
+
/*----------------------------- handle the last (probably partial) block */
|
174
|
+
k8 = (const uint8_t *) k;
|
175
|
+
switch(length)
|
176
|
+
{
|
177
|
+
case 12: c+=k[4]+(((uint32_t) k[5])<<16);
|
178
|
+
b+=k[2]+(((uint32_t) k[3])<<16);
|
179
|
+
a+=k[0]+(((uint32_t) k[1])<<16);
|
180
|
+
break;
|
181
|
+
case 11: c+=((uint32_t) k8[10])<<16; /* fall through */
|
182
|
+
case 10: c+=k[4];
|
183
|
+
b+=k[2]+(((uint32_t) k[3])<<16);
|
184
|
+
a+=k[0]+(((uint32_t) k[1])<<16);
|
185
|
+
break;
|
186
|
+
case 9 : c+=k8[8]; /* fall through */
|
187
|
+
case 8 : b+=k[2]+(((uint32_t) k[3])<<16);
|
188
|
+
a+=k[0]+(((uint32_t) k[1])<<16);
|
189
|
+
break;
|
190
|
+
case 7 : b+=((uint32_t) k8[6])<<16; /* fall through */
|
191
|
+
case 6 : b+=k[2];
|
192
|
+
a+=k[0]+(((uint32_t) k[1])<<16);
|
193
|
+
break;
|
194
|
+
case 5 : b+=k8[4]; /* fall through */
|
195
|
+
case 4 : a+=k[0]+(((uint32_t) k[1])<<16);
|
196
|
+
break;
|
197
|
+
case 3 : a+=((uint32_t) k8[2])<<16; /* fall through */
|
198
|
+
case 2 : a+=k[0];
|
199
|
+
break;
|
200
|
+
case 1 : a+=k8[0];
|
201
|
+
break;
|
202
|
+
case 0 : return c; /* zero length requires no mixing */
|
203
|
+
}
|
204
|
+
|
205
|
+
} else { /* need to read the key one byte at a time */
|
206
|
+
const uint8_t *k = (const uint8_t *)key;
|
207
|
+
|
208
|
+
/*--------------- all but the last block: affect some 32 bits of (a, b, c) */
|
209
|
+
while (length > 12) {
|
210
|
+
a += k[0];
|
211
|
+
a += ((uint32_t) k[1])<<8;
|
212
|
+
a += ((uint32_t) k[2])<<16;
|
213
|
+
a += ((uint32_t) k[3])<<24;
|
214
|
+
b += k[4];
|
215
|
+
b += ((uint32_t) k[5])<<8;
|
216
|
+
b += ((uint32_t) k[6])<<16;
|
217
|
+
b += ((uint32_t) k[7])<<24;
|
218
|
+
c += k[8];
|
219
|
+
c += ((uint32_t) k[9])<<8;
|
220
|
+
c += ((uint32_t) k[10])<<16;
|
221
|
+
c += ((uint32_t) k[11])<<24;
|
222
|
+
mix(a,b,c);
|
223
|
+
length -= 12;
|
224
|
+
k += 12;
|
225
|
+
}
|
226
|
+
|
227
|
+
/*-------------------------------- last block: affect all 32 bits of (c) */
|
228
|
+
switch (length) { /* all the case statements fall through */
|
229
|
+
case 12: c+=((uint32_t) k[11])<<24;
|
230
|
+
case 11: c+=((uint32_t) k[10])<<16;
|
231
|
+
case 10: c+=((uint32_t) k[9])<<8;
|
232
|
+
case 9 : c+=k[8];
|
233
|
+
case 8 : b+=((uint32_t) k[7])<<24;
|
234
|
+
case 7 : b+=((uint32_t) k[6])<<16;
|
235
|
+
case 6 : b+=((uint32_t) k[5])<<8;
|
236
|
+
case 5 : b+=k[4];
|
237
|
+
case 4 : a+=((uint32_t) k[3])<<24;
|
238
|
+
case 3 : a+=((uint32_t) k[2])<<16;
|
239
|
+
case 2 : a+=((uint32_t) k[1])<<8;
|
240
|
+
case 1 : a+=k[0];
|
241
|
+
break;
|
242
|
+
case 0 : return c;
|
243
|
+
}
|
244
|
+
}
|
245
|
+
|
246
|
+
final(a, b, c);
|
247
|
+
return c;
|
248
|
+
}
|
249
|
+
|
250
|
+
static inline
|
251
|
+
uint32_t jhash(const void *key, size_t length, uint32_t seed)
|
252
|
+
{
|
253
|
+
return hashlittle(key, length, seed);
|
254
|
+
}
|
255
|
+
|
256
|
+
#endif /* _JHASH_H */
|
data/ext/mwrap/mwrap.c
ADDED
@@ -0,0 +1,598 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org>
|
3
|
+
* License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
|
4
|
+
*/
|
5
|
+
#define _LGPL_SOURCE /* allows URCU to inline some stuff */
|
6
|
+
#include <ruby/ruby.h>
|
7
|
+
#include <ruby/thread.h>
|
8
|
+
#include <ruby/io.h>
|
9
|
+
#include <execinfo.h>
|
10
|
+
#include <stdio.h>
|
11
|
+
#include <stdlib.h>
|
12
|
+
#include <string.h>
|
13
|
+
#include <dlfcn.h>
|
14
|
+
#include <assert.h>
|
15
|
+
#include <errno.h>
|
16
|
+
#include <sys/types.h>
|
17
|
+
#include <sys/stat.h>
|
18
|
+
#include <fcntl.h>
|
19
|
+
#include <urcu-bp.h>
|
20
|
+
#include <urcu/rculfhash.h>
|
21
|
+
#include "jhash.h"
|
22
|
+
|
23
|
+
static ID id_uminus;
|
24
|
+
const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev */
|
25
|
+
static int *(*has_gvl_p)(void);
|
26
|
+
#ifdef __FreeBSD__
|
27
|
+
void *__malloc(size_t);
|
28
|
+
void *__calloc(size_t, size_t);
|
29
|
+
void *__realloc(void *, size_t);
|
30
|
+
static void *(*real_malloc)(size_t) = __malloc;
|
31
|
+
static void *(*real_calloc)(size_t, size_t) = __calloc;
|
32
|
+
static void *(*real_realloc)(void *, size_t) = __realloc;
|
33
|
+
# define RETURN_IF_NOT_READY() do {} while (0) /* nothing */
|
34
|
+
#else
|
35
|
+
static int ready;
|
36
|
+
static void *(*real_malloc)(size_t);
|
37
|
+
static void *(*real_calloc)(size_t, size_t);
|
38
|
+
static void *(*real_realloc)(void *, size_t);
|
39
|
+
|
40
|
+
/*
|
41
|
+
* we need to fake an OOM condition while dlsym is running,
|
42
|
+
* as that calls calloc under glibc, but we don't have the
|
43
|
+
* symbol for the jemalloc calloc, yet
|
44
|
+
*/
|
45
|
+
# define RETURN_IF_NOT_READY() do { \
|
46
|
+
if (!ready) { \
|
47
|
+
errno = ENOMEM; \
|
48
|
+
return NULL; \
|
49
|
+
} \
|
50
|
+
} while (0)
|
51
|
+
|
52
|
+
#endif /* !FreeBSD */
|
53
|
+
|
54
|
+
/*
|
55
|
+
* rb_source_location_cstr relies on GET_EC(), and it's possible
|
56
|
+
* to have a native thread but no EC during the early and late
|
57
|
+
* (teardown) phases of the Ruby process
|
58
|
+
*/
|
59
|
+
static void **ec_loc;
|
60
|
+
|
61
|
+
static struct cds_lfht *totals;
|
62
|
+
|
63
|
+
static struct cds_lfht *
|
64
|
+
lfht_new(void)
|
65
|
+
{
|
66
|
+
return cds_lfht_new(16384, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
|
67
|
+
}
|
68
|
+
|
69
|
+
__attribute__((constructor)) static void resolve_malloc(void)
|
70
|
+
{
|
71
|
+
int err;
|
72
|
+
|
73
|
+
#ifndef __FreeBSD__
|
74
|
+
real_malloc = dlsym(RTLD_NEXT, "malloc");
|
75
|
+
real_calloc = dlsym(RTLD_NEXT, "calloc");
|
76
|
+
real_realloc = dlsym(RTLD_NEXT, "realloc");
|
77
|
+
if (!real_calloc || !real_malloc || !real_realloc) {
|
78
|
+
fprintf(stderr, "missing calloc/malloc/realloc %p %p %p\n",
|
79
|
+
real_calloc, real_malloc, real_realloc);
|
80
|
+
_exit(1);
|
81
|
+
}
|
82
|
+
ready = 1;
|
83
|
+
#endif
|
84
|
+
|
85
|
+
totals = lfht_new();
|
86
|
+
if (!totals)
|
87
|
+
fprintf(stderr, "failed to allocate totals table\n");
|
88
|
+
|
89
|
+
err = pthread_atfork(call_rcu_before_fork,
|
90
|
+
call_rcu_after_fork_parent,
|
91
|
+
call_rcu_after_fork_child);
|
92
|
+
if (err)
|
93
|
+
fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err));
|
94
|
+
|
95
|
+
has_gvl_p = dlsym(RTLD_DEFAULT, "ruby_thread_has_gvl_p");
|
96
|
+
|
97
|
+
/*
|
98
|
+
* resolve dynamically so it doesn't break when LD_PRELOAD-ed
|
99
|
+
* into non-Ruby binaries
|
100
|
+
*/
|
101
|
+
ec_loc = dlsym(RTLD_DEFAULT, "ruby_current_execution_context_ptr");
|
102
|
+
}
|
103
|
+
|
104
|
+
#ifndef HAVE_MEMPCPY
|
105
|
+
static void *
|
106
|
+
my_mempcpy(void *dest, const void *src, size_t n)
|
107
|
+
{
|
108
|
+
return (char *)memcpy(dest, src, n) + n;
|
109
|
+
}
|
110
|
+
#define mempcpy(dst,src,n) my_mempcpy(dst,src,n)
|
111
|
+
#endif
|
112
|
+
|
113
|
+
/* stolen from glibc: */
|
114
|
+
#define RETURN_ADDRESS(nr) \
|
115
|
+
(uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
|
116
|
+
|
117
|
+
static __thread size_t locating;
|
118
|
+
|
119
|
+
#define INT2STR_MAX (sizeof(int) == 4 ? 10 : 19)
|
120
|
+
static char *int2str(int num, char *dst, size_t * size)
|
121
|
+
{
|
122
|
+
if (num <= 9) {
|
123
|
+
*size -= 1;
|
124
|
+
*dst++ = (char)(num + '0');
|
125
|
+
return dst;
|
126
|
+
} else {
|
127
|
+
char buf[INT2STR_MAX];
|
128
|
+
char *end = buf + sizeof(buf);
|
129
|
+
char *p = end;
|
130
|
+
size_t adj;
|
131
|
+
|
132
|
+
do {
|
133
|
+
*size -= 1;
|
134
|
+
*--p = (char)((num % 10) + '0');
|
135
|
+
num /= 10;
|
136
|
+
} while (num && *size);
|
137
|
+
|
138
|
+
if (!num) {
|
139
|
+
adj = end - p;
|
140
|
+
return mempcpy(dst, p, adj);
|
141
|
+
}
|
142
|
+
}
|
143
|
+
return NULL;
|
144
|
+
}
|
145
|
+
|
146
|
+
static int has_ec_p(void)
|
147
|
+
{
|
148
|
+
return (ec_loc && *ec_loc);
|
149
|
+
}
|
150
|
+
|
151
|
+
struct src_loc {
|
152
|
+
struct rcu_head rcu_head;
|
153
|
+
size_t calls;
|
154
|
+
size_t total;
|
155
|
+
struct cds_lfht_node hnode;
|
156
|
+
uint32_t hval;
|
157
|
+
uint32_t capa;
|
158
|
+
char k[];
|
159
|
+
};
|
160
|
+
|
161
|
+
static int loc_is_addr(const struct src_loc *l)
|
162
|
+
{
|
163
|
+
return l->capa == 0;
|
164
|
+
}
|
165
|
+
|
166
|
+
static size_t loc_size(const struct src_loc *l)
|
167
|
+
{
|
168
|
+
return loc_is_addr(l) ? sizeof(uintptr_t) : l->capa;
|
169
|
+
}
|
170
|
+
|
171
|
+
static int loc_eq(struct cds_lfht_node *node, const void *key)
|
172
|
+
{
|
173
|
+
const struct src_loc *existing;
|
174
|
+
const struct src_loc *k = key;
|
175
|
+
|
176
|
+
existing = caa_container_of(node, struct src_loc, hnode);
|
177
|
+
|
178
|
+
return (k->hval == existing->hval &&
|
179
|
+
k->capa == existing->capa &&
|
180
|
+
memcmp(k->k, existing->k, loc_size(k)) == 0);
|
181
|
+
}
|
182
|
+
|
183
|
+
static void totals_add(struct src_loc *k)
|
184
|
+
{
|
185
|
+
struct cds_lfht_iter iter;
|
186
|
+
struct cds_lfht_node *cur;
|
187
|
+
struct src_loc *l;
|
188
|
+
struct cds_lfht *t;
|
189
|
+
|
190
|
+
|
191
|
+
again:
|
192
|
+
rcu_read_lock();
|
193
|
+
t = rcu_dereference(totals);
|
194
|
+
if (!t) goto out_unlock;
|
195
|
+
cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
|
196
|
+
cur = cds_lfht_iter_get_node(&iter);
|
197
|
+
if (cur) {
|
198
|
+
l = caa_container_of(cur, struct src_loc, hnode);
|
199
|
+
uatomic_add(&l->total, k->total);
|
200
|
+
uatomic_add(&l->calls, 1);
|
201
|
+
} else {
|
202
|
+
size_t n = loc_size(k);
|
203
|
+
l = malloc(sizeof(*l) + n);
|
204
|
+
if (!l) goto out_unlock;
|
205
|
+
|
206
|
+
memcpy(l, k, sizeof(*l) + n);
|
207
|
+
l->calls = 1;
|
208
|
+
cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
|
209
|
+
if (cur != &l->hnode) { /* lost race */
|
210
|
+
rcu_read_unlock();
|
211
|
+
free(l);
|
212
|
+
goto again;
|
213
|
+
}
|
214
|
+
}
|
215
|
+
out_unlock:
|
216
|
+
rcu_read_unlock();
|
217
|
+
}
|
218
|
+
|
219
|
+
static void update_stats(size_t size, uintptr_t caller)
|
220
|
+
{
|
221
|
+
struct src_loc *k;
|
222
|
+
static const size_t xlen = sizeof(caller);
|
223
|
+
char *dst;
|
224
|
+
|
225
|
+
if (locating++) goto out; /* do not recurse into another *alloc */
|
226
|
+
|
227
|
+
if (has_gvl_p && has_gvl_p() && has_ec_p()) {
|
228
|
+
int line;
|
229
|
+
const char *ptr = rb_source_location_cstr(&line);
|
230
|
+
size_t len;
|
231
|
+
size_t int_size = INT2STR_MAX;
|
232
|
+
|
233
|
+
if (!ptr) goto unknown;
|
234
|
+
|
235
|
+
/* avoid vsnprintf or anything which could call malloc here: */
|
236
|
+
len = strlen(ptr);
|
237
|
+
k = alloca(sizeof(*k) + len + 1 + int_size + 1);
|
238
|
+
k->total = size;
|
239
|
+
dst = mempcpy(k->k, ptr, len);
|
240
|
+
*dst++ = ':';
|
241
|
+
dst = int2str(line, dst, &int_size);
|
242
|
+
if (dst) {
|
243
|
+
*dst = 0; /* terminate string */
|
244
|
+
k->capa = (uint32_t)(dst - k->k + 1);
|
245
|
+
k->hval = jhash(k->k, k->capa, 0xdeadbeef);
|
246
|
+
totals_add(k);
|
247
|
+
} else {
|
248
|
+
rb_bug("bad math making key from location %s:%d\n",
|
249
|
+
ptr, line);
|
250
|
+
}
|
251
|
+
} else {
|
252
|
+
unknown:
|
253
|
+
k = alloca(sizeof(*k) + xlen);
|
254
|
+
k->total = size;
|
255
|
+
memcpy(k->k, &caller, xlen);
|
256
|
+
k->capa = 0;
|
257
|
+
k->hval = jhash(k->k, xlen, 0xdeadbeef);
|
258
|
+
totals_add(k);
|
259
|
+
}
|
260
|
+
out:
|
261
|
+
--locating;
|
262
|
+
}
|
263
|
+
|
264
|
+
/*
|
265
|
+
* Do we care for *memalign? ruby/gc.c uses it in ways this lib
|
266
|
+
* doesn't care about, but maybe some gems use it, too.
|
267
|
+
*/
|
268
|
+
void *malloc(size_t size)
|
269
|
+
{
|
270
|
+
RETURN_IF_NOT_READY();
|
271
|
+
update_stats(size, RETURN_ADDRESS(0));
|
272
|
+
return real_malloc(size);
|
273
|
+
}
|
274
|
+
|
275
|
+
void *calloc(size_t nmemb, size_t size)
|
276
|
+
{
|
277
|
+
RETURN_IF_NOT_READY();
|
278
|
+
/* ruby_xcalloc already does overflow checking */
|
279
|
+
update_stats(nmemb * size, RETURN_ADDRESS(0));
|
280
|
+
return real_calloc(nmemb, size);
|
281
|
+
}
|
282
|
+
|
283
|
+
void *realloc(void *ptr, size_t size)
|
284
|
+
{
|
285
|
+
RETURN_IF_NOT_READY();
|
286
|
+
update_stats(size, RETURN_ADDRESS(0));
|
287
|
+
return real_realloc(ptr, size);
|
288
|
+
}
|
289
|
+
|
290
|
+
struct dump_arg {
|
291
|
+
FILE *fp;
|
292
|
+
size_t min;
|
293
|
+
};
|
294
|
+
|
295
|
+
static void *dump_to_file(void *x)
|
296
|
+
{
|
297
|
+
struct dump_arg *a = x;
|
298
|
+
struct cds_lfht_iter iter;
|
299
|
+
struct src_loc *l;
|
300
|
+
struct cds_lfht *t;
|
301
|
+
|
302
|
+
++locating;
|
303
|
+
rcu_read_lock();
|
304
|
+
t = rcu_dereference(totals);
|
305
|
+
if (!t)
|
306
|
+
goto out_unlock;
|
307
|
+
cds_lfht_for_each_entry(t, &iter, l, hnode) {
|
308
|
+
const void *p = l->k;
|
309
|
+
char **s = 0;
|
310
|
+
if (l->total <= a->min) continue;
|
311
|
+
|
312
|
+
if (loc_is_addr(l)) {
|
313
|
+
s = backtrace_symbols(p, 1);
|
314
|
+
p = s[0];
|
315
|
+
}
|
316
|
+
fprintf(a->fp, "%16zu %12zu %s\n",
|
317
|
+
l->total, l->calls, (const char *)p);
|
318
|
+
if (s) free(s);
|
319
|
+
}
|
320
|
+
out_unlock:
|
321
|
+
rcu_read_unlock();
|
322
|
+
--locating;
|
323
|
+
return 0;
|
324
|
+
}
|
325
|
+
|
326
|
+
/*
|
327
|
+
* call-seq:
|
328
|
+
*
|
329
|
+
* Mwrap.dump([[io] [, min]] -> nil
|
330
|
+
*
|
331
|
+
* Dumps the current totals to +io+ which must be an IO object
|
332
|
+
* (StringIO and similar are not supported). Total sizes smaller
|
333
|
+
* than or equal to +min+ are skipped.
|
334
|
+
*
|
335
|
+
* The output is space-delimited by 3 columns:
|
336
|
+
*
|
337
|
+
* total_size call_count location
|
338
|
+
*/
|
339
|
+
static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
|
340
|
+
{
|
341
|
+
VALUE io, min;
|
342
|
+
struct dump_arg a;
|
343
|
+
rb_io_t *fptr;
|
344
|
+
|
345
|
+
rb_scan_args(argc, argv, "02", &io, &min);
|
346
|
+
|
347
|
+
if (NIL_P(io))
|
348
|
+
/* library may be linked w/o Ruby */
|
349
|
+
io = *((VALUE *)dlsym(RTLD_DEFAULT, "rb_stderr"));
|
350
|
+
|
351
|
+
a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
|
352
|
+
io = rb_io_get_write_io(io);
|
353
|
+
GetOpenFile(io, fptr);
|
354
|
+
a.fp = rb_io_stdio_file(fptr);
|
355
|
+
|
356
|
+
rb_thread_call_without_gvl(dump_to_file, &a, 0, 0);
|
357
|
+
RB_GC_GUARD(io);
|
358
|
+
return Qnil;
|
359
|
+
}
|
360
|
+
|
361
|
+
static void
|
362
|
+
free_src_loc(struct rcu_head *head)
|
363
|
+
{
|
364
|
+
struct src_loc *l = caa_container_of(head, struct src_loc, rcu_head);
|
365
|
+
free(l);
|
366
|
+
}
|
367
|
+
|
368
|
+
static void *totals_clear(void *ign)
|
369
|
+
{
|
370
|
+
struct cds_lfht *new, *old;
|
371
|
+
struct cds_lfht_iter iter;
|
372
|
+
struct src_loc *l;
|
373
|
+
|
374
|
+
new = lfht_new();
|
375
|
+
rcu_read_lock();
|
376
|
+
old = rcu_dereference(totals);
|
377
|
+
rcu_assign_pointer(totals, new);
|
378
|
+
cds_lfht_for_each_entry(old, &iter, l, hnode) {
|
379
|
+
cds_lfht_del(old, &l->hnode);
|
380
|
+
call_rcu(&l->rcu_head, free_src_loc);
|
381
|
+
}
|
382
|
+
rcu_read_unlock();
|
383
|
+
|
384
|
+
synchronize_rcu(); /* ensure totals points to new */
|
385
|
+
cds_lfht_destroy(old, NULL);
|
386
|
+
return 0;
|
387
|
+
}
|
388
|
+
|
389
|
+
/*
|
390
|
+
* call-seq:
|
391
|
+
*
|
392
|
+
* Mwrap.clear -> nil
|
393
|
+
*
|
394
|
+
* Atomically replaces the totals table and destroys the old one.
|
395
|
+
* This resets all statistics. It is more expensive than `Mwrap.reset'
|
396
|
+
* as new allocations will need to be made to repopulate the new table.
|
397
|
+
*/
|
398
|
+
static VALUE mwrap_clear(VALUE mod)
|
399
|
+
{
|
400
|
+
rb_thread_call_without_gvl(totals_clear, 0, 0, 0);
|
401
|
+
return Qnil;
|
402
|
+
}
|
403
|
+
|
404
|
+
static void *totals_reset(void *ign)
|
405
|
+
{
|
406
|
+
struct cds_lfht *t;
|
407
|
+
struct cds_lfht_iter iter;
|
408
|
+
struct src_loc *l;
|
409
|
+
|
410
|
+
rcu_read_lock();
|
411
|
+
t = rcu_dereference(totals);
|
412
|
+
cds_lfht_for_each_entry(t, &iter, l, hnode) {
|
413
|
+
uatomic_set(&l->total, 0);
|
414
|
+
uatomic_set(&l->calls, 0);
|
415
|
+
}
|
416
|
+
rcu_read_unlock();
|
417
|
+
return 0;
|
418
|
+
}
|
419
|
+
|
420
|
+
/*
|
421
|
+
* call-seq:
|
422
|
+
*
|
423
|
+
* Mwrap.reset -> nil
|
424
|
+
*
|
425
|
+
* Resets the the total tables by zero-ing all counters.
|
426
|
+
* This resets all statistics and is less costly than `Mwrap.clear'
|
427
|
+
* but is not an atomic operation.
|
428
|
+
*/
|
429
|
+
static VALUE mwrap_reset(VALUE mod)
|
430
|
+
{
|
431
|
+
rb_thread_call_without_gvl(totals_reset, 0, 0, 0);
|
432
|
+
return Qnil;
|
433
|
+
}
|
434
|
+
|
435
|
+
static VALUE dump_ensure(VALUE ignored)
|
436
|
+
{
|
437
|
+
rcu_read_unlock();
|
438
|
+
--locating;
|
439
|
+
return Qfalse;
|
440
|
+
}
|
441
|
+
|
442
|
+
static VALUE dump_each_rcu(VALUE x)
|
443
|
+
{
|
444
|
+
struct dump_arg *a = (struct dump_arg *)x;
|
445
|
+
struct cds_lfht *t;
|
446
|
+
struct cds_lfht_iter iter;
|
447
|
+
struct src_loc *l;
|
448
|
+
|
449
|
+
t = rcu_dereference(totals);
|
450
|
+
cds_lfht_for_each_entry(t, &iter, l, hnode) {
|
451
|
+
VALUE v[3];
|
452
|
+
if (l->total <= a->min) continue;
|
453
|
+
|
454
|
+
if (loc_is_addr(l)) {
|
455
|
+
char **s = backtrace_symbols((void *)l->k, 1);
|
456
|
+
v[1] = rb_str_new_cstr(s[0]);
|
457
|
+
free(s);
|
458
|
+
}
|
459
|
+
else {
|
460
|
+
v[1] = rb_str_new(l->k, l->capa - 1);
|
461
|
+
}
|
462
|
+
|
463
|
+
/* deduplicate and try to free up some memory */
|
464
|
+
v[0] = rb_funcall(v[1], id_uminus, 0);
|
465
|
+
if (!OBJ_FROZEN_RAW(v[1]))
|
466
|
+
rb_str_resize(v[1], 0);
|
467
|
+
|
468
|
+
v[1] = SIZET2NUM(l->total);
|
469
|
+
v[2] = SIZET2NUM(l->calls);
|
470
|
+
|
471
|
+
rb_yield_values2(3, v);
|
472
|
+
assert(rcu_read_ongoing());
|
473
|
+
}
|
474
|
+
return Qnil;
|
475
|
+
}
|
476
|
+
|
477
|
+
/*
|
478
|
+
* call-seq:
|
479
|
+
*
|
480
|
+
* Mwrap.each([min]) { |location,total_bytes,call_count| ... }
|
481
|
+
*
|
482
|
+
* Yields each entry of the of the table to a caller-supplied block.
|
483
|
+
* +min+ may be specified to filter out lines with +total_bytes+
|
484
|
+
* equal-to-or-smaller-than the supplied minimum.
|
485
|
+
*/
|
486
|
+
static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
|
487
|
+
{
|
488
|
+
VALUE min;
|
489
|
+
struct dump_arg a;
|
490
|
+
|
491
|
+
rb_scan_args(argc, argv, "01", &min);
|
492
|
+
a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
|
493
|
+
|
494
|
+
++locating;
|
495
|
+
rcu_read_lock();
|
496
|
+
|
497
|
+
return rb_ensure(dump_each_rcu, (VALUE)&a, dump_ensure, 0);
|
498
|
+
}
|
499
|
+
|
500
|
+
/*
|
501
|
+
* Document-module: Mwrap
|
502
|
+
*
|
503
|
+
* require 'mwrap'
|
504
|
+
*
|
505
|
+
* Mwrap has a dual function as both a Ruby C extension and LD_PRELOAD
|
506
|
+
* wrapper. As a Ruby C extension, it exposes a limited Ruby API.
|
507
|
+
* To be effective at gathering status, mwrap must be loaded as a
|
508
|
+
* LD_PRELOAD (using the mwrap(1) executable makes it easy)
|
509
|
+
*
|
510
|
+
* ENVIRONMENT
|
511
|
+
*
|
512
|
+
* The "MWRAP" environment variable contains a comma-delimited list
|
513
|
+
* of key:value options for automatically dumping at program exit.
|
514
|
+
*
|
515
|
+
* * dump_fd: a writable FD to dump to
|
516
|
+
* * dump_path: a path to dump to, the file is opened in O_APPEND mode
|
517
|
+
* * dump_min: the minimum allocation size (total) to dump
|
518
|
+
*
|
519
|
+
* If both `dump_fd' and `dump_path' are specified, dump_path takes
|
520
|
+
* precedence.
|
521
|
+
*/
|
522
|
+
void Init_mwrap(void)
|
523
|
+
{
|
524
|
+
VALUE mod = rb_define_module("Mwrap");
|
525
|
+
id_uminus = rb_intern("-@");
|
526
|
+
|
527
|
+
rb_define_singleton_method(mod, "dump", mwrap_dump, -1);
|
528
|
+
rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
|
529
|
+
rb_define_singleton_method(mod, "reset", mwrap_reset, 0);
|
530
|
+
rb_define_singleton_method(mod, "each", mwrap_each, -1);
|
531
|
+
}
|
532
|
+
|
533
|
+
/* rb_cloexec_open isn't usable by non-Ruby processes */
|
534
|
+
#ifndef O_CLOEXEC
|
535
|
+
# define O_CLOEXEC 0
|
536
|
+
#endif
|
537
|
+
|
538
|
+
__attribute__ ((destructor))
|
539
|
+
static void mwrap_dump_destructor(void)
|
540
|
+
{
|
541
|
+
const char *opt = getenv("MWRAP");
|
542
|
+
const char *modes[] = { "a", "a+", "w", "w+", "r+" };
|
543
|
+
struct dump_arg a;
|
544
|
+
size_t i;
|
545
|
+
int dump_fd;
|
546
|
+
char *dump_path;
|
547
|
+
|
548
|
+
if (!opt)
|
549
|
+
return;
|
550
|
+
|
551
|
+
++locating;
|
552
|
+
if ((dump_path = strstr(opt, "dump_path:")) &&
|
553
|
+
(dump_path += sizeof("dump_path")) &&
|
554
|
+
*dump_path) {
|
555
|
+
char *end = strchr(dump_path, ',');
|
556
|
+
if (end) {
|
557
|
+
char *tmp = alloca(end - dump_path + 1);
|
558
|
+
end = mempcpy(tmp, dump_path, end - dump_path);
|
559
|
+
*end = 0;
|
560
|
+
dump_path = tmp;
|
561
|
+
}
|
562
|
+
dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT,
|
563
|
+
0666);
|
564
|
+
if (dump_fd < 0) {
|
565
|
+
fprintf(stderr, "open %s failed: %s\n", dump_path,
|
566
|
+
strerror(errno));
|
567
|
+
goto out;
|
568
|
+
}
|
569
|
+
}
|
570
|
+
else if (!sscanf(opt, "dump_fd:%d", &dump_fd))
|
571
|
+
goto out;
|
572
|
+
|
573
|
+
if (!sscanf(opt, "dump_min:%zu", &a.min))
|
574
|
+
a.min = 0;
|
575
|
+
|
576
|
+
switch (dump_fd) {
|
577
|
+
case 0: goto out;
|
578
|
+
case 1: a.fp = stdout; break;
|
579
|
+
case 2: a.fp = stderr; break;
|
580
|
+
default:
|
581
|
+
if (dump_fd < 0)
|
582
|
+
goto out;
|
583
|
+
a.fp = 0;
|
584
|
+
|
585
|
+
for (i = 0; !a.fp && i < 5; i++)
|
586
|
+
a.fp = fdopen(dump_fd, modes[i]);
|
587
|
+
|
588
|
+
if (!a.fp) {
|
589
|
+
fprintf(stderr, "failed to open fd=%d: %s\n",
|
590
|
+
dump_fd, strerror(errno));
|
591
|
+
goto out;
|
592
|
+
}
|
593
|
+
/* we'll leak some memory here, but this is a destructor */
|
594
|
+
}
|
595
|
+
dump_to_file(&a);
|
596
|
+
out:
|
597
|
+
--locating;
|
598
|
+
}
|