mwrap 2.3.0 → 3.0.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/COPYING +617 -282
- data/Documentation/.gitignore +2 -0
- data/Documentation/GNUmakefile +63 -0
- data/Documentation/mwrap.1 +242 -0
- data/Documentation/mwrap.pod +123 -0
- data/MANIFEST +13 -1
- data/README +25 -17
- data/Rakefile +10 -2
- data/VERSION-GEN +1 -1
- data/ext/mwrap/check.h +23 -0
- data/ext/mwrap/dlmalloc_c.h +6294 -0
- data/ext/mwrap/extconf.rb +3 -7
- data/ext/mwrap/gcc.h +13 -0
- data/ext/mwrap/httpd.h +1367 -0
- data/ext/mwrap/mwrap.c +44 -1151
- data/ext/mwrap/mwrap_core.h +1095 -0
- data/ext/mwrap/mymalloc.h +299 -0
- data/ext/mwrap/picohttpparser.h +92 -0
- data/ext/mwrap/picohttpparser_c.h +670 -0
- data/lib/mwrap/version.rb +1 -1
- data/lib/mwrap_rack.rb +14 -58
- data/mwrap.gemspec +10 -3
- data/t/httpd.t +191 -0
- data/t/test_common.perl +54 -0
- data/test/test_mwrap.rb +34 -50
- metadata +21 -7
@@ -0,0 +1,299 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (C) mwrap hackers <mwrap-perl@80x24.org>
|
3
|
+
* License: LGPL-2.1+ <https://www.gnu.org/licenses/lgpl-2.1.txt>
|
4
|
+
*
|
5
|
+
* Unlike the rest of the project, I'm happy with this being LGPL-2.1+
|
6
|
+
* since the remote_free_* stuff is meant for glibc, anyways.
|
7
|
+
*
|
8
|
+
* This is a small wrapper on top of dlmalloc (dlmalloc_c.h) which
|
9
|
+
* adds wait-free free(3) multi-threading support to avoid contention
|
10
|
+
* with call_rcu.
|
11
|
+
|
12
|
+
* The wait-free free(3) implementation was proposed for inclusion into
|
13
|
+
glibc in 2018 and may eventually become part of glibc:
|
14
|
+
https://inbox.sourceware.org/libc-alpha/20180731084936.g4yw6wnvt677miti@dcvr/
|
15
|
+
|
16
|
+
* Arenas are thread-local and returned to a global pool upon thread
|
17
|
+
destruction. This works well for processes with stable thread counts,
|
18
|
+
but wastes memory in processes with unstable thread counts.
|
19
|
+
|
20
|
+
* On Linux with O_TMPFILE support, all allocations are backed by
|
21
|
+
a temporary file (in TMPDIR). This avoids OOM errors on
|
22
|
+
memory-constrained systems due to the higher-than-normal memory
|
23
|
+
usage of mwrap itself.
|
24
|
+
|
25
|
+
* memalign-family support is ignored (and reimplemented in mwrap_core.h).
|
26
|
+
dlmalloc's attempts to improve memory-efficiency is prone to fragmentation
|
27
|
+
if memaligned-allocations are repeatedly freed and relalocated while
|
28
|
+
normal mallocs are happening. The complexity and work needed to
|
29
|
+
avoid it does not seem worthwhile nowadays given:
|
30
|
+
1) memalign usage isn't common
|
31
|
+
2) 64-bit systems with virtually unlimited VA space are common
|
32
|
+
see https://sourceware.org/bugzilla/show_bug.cgi?id=14581
|
33
|
+
|
34
|
+
* realloc and calloc are also reimplemented naively in mwrap_core.h since
|
35
|
+
the optimizations in made it harder to deal with accounting needs
|
36
|
+
of mwrap. They may be reinstated in the future.
|
37
|
+
*/
|
38
|
+
#include "check.h"
|
39
|
+
#include "gcc.h"
|
40
|
+
#include <urcu/rculist.h>
|
41
|
+
#include <urcu/wfcqueue.h>
|
42
|
+
#include <urcu-bp.h>
|
43
|
+
#include <sys/types.h>
|
44
|
+
#include <sys/stat.h>
|
45
|
+
#include <fcntl.h>
|
46
|
+
#include <errno.h>
|
47
|
+
|
48
|
+
/* this is fine on most x86-64, especially with file-backed mmap(2) */
|
49
|
+
#define DEFAULT_GRANULARITY (64U * 1024U * 1024U)
|
50
|
+
|
51
|
+
#if !defined(MWRAP_FILE_BACKED) && defined(__linux__) && defined(O_TMPFILE)
|
52
|
+
# define MWRAP_FILE_BACKED 1
|
53
|
+
#else
|
54
|
+
# define MWRAP_FILE_BACKED 0
|
55
|
+
#endif
|
56
|
+
#if MWRAP_FILE_BACKED
|
57
|
+
# include <sys/mman.h>
|
58
|
+
static void *my_mmap(size_t size)
|
59
|
+
{
|
60
|
+
int flags = MAP_PRIVATE;
|
61
|
+
const char *tmpdir = getenv("TMPDIR");
|
62
|
+
int fd;
|
63
|
+
void *ret;
|
64
|
+
|
65
|
+
if (!tmpdir)
|
66
|
+
tmpdir = "/tmp";
|
67
|
+
|
68
|
+
fd = open(tmpdir, O_TMPFILE|O_RDWR|S_IRUSR|S_IWUSR, 0600);
|
69
|
+
if (fd < 0) {
|
70
|
+
flags |= MAP_ANONYMOUS;
|
71
|
+
} else {
|
72
|
+
int t = ftruncate(fd, size); /* sparse file */
|
73
|
+
|
74
|
+
if (t < 0) {
|
75
|
+
flags |= MAP_ANONYMOUS;
|
76
|
+
close(fd);
|
77
|
+
fd = -1;
|
78
|
+
}
|
79
|
+
}
|
80
|
+
ret = mmap(NULL, size, PROT_READ|PROT_WRITE, flags, fd, 0);
|
81
|
+
if (fd >= 0) {
|
82
|
+
int err = errno;
|
83
|
+
close(fd);
|
84
|
+
errno = err;
|
85
|
+
}
|
86
|
+
return ret;
|
87
|
+
}
|
88
|
+
#endif /* MWRAP_FILE_BACKED */
|
89
|
+
|
90
|
+
/* knobs for dlmalloc */
|
91
|
+
#define FOOTERS 1 /* required for remote_free_* stuff */
|
92
|
+
#define USE_DL_PREFIX
|
93
|
+
#define ONLY_MSPACES 1 /* aka per-thread "arenas" */
|
94
|
+
#define DLMALLOC_EXPORT static inline
|
95
|
+
/* #define NO_MALLOC_STATS 1 */
|
96
|
+
#define USE_LOCKS 0 /* we do our own global_mtx + ms_tsd */
|
97
|
+
#if MWRAP_FILE_BACKED
|
98
|
+
# define MMAP(size) my_mmap(size)
|
99
|
+
# define HAVE_MREMAP 0
|
100
|
+
#endif
|
101
|
+
#include "dlmalloc_c.h"
|
102
|
+
#undef ABORT /* conflicts with Perl */
|
103
|
+
#undef NOINLINE /* conflicts with Ruby, defined by dlmalloc_c.h */
|
104
|
+
#undef HAVE_MREMAP /* conflicts with Ruby 3.2 */
|
105
|
+
|
106
|
+
static MWRAP_TSD mstate ms_tsd;
|
107
|
+
|
108
|
+
/* global_mtx protects arenas_active, arenas_unused, and tlskey init */
|
109
|
+
static pthread_mutex_t global_mtx = PTHREAD_MUTEX_INITIALIZER;
|
110
|
+
static pthread_key_t tlskey;
|
111
|
+
static CDS_LIST_HEAD(arenas_active);
|
112
|
+
static CDS_LIST_HEAD(arenas_unused);
|
113
|
+
|
114
|
+
/* called on pthread exit */
|
115
|
+
ATTR_COLD static void mstate_tsd_dtor(void *p)
|
116
|
+
{
|
117
|
+
mstate ms = p;
|
118
|
+
|
119
|
+
/*
|
120
|
+
* In case another destructor calls free (or any allocation function,
|
121
|
+
* in that case we leak the mstate forever)
|
122
|
+
*/
|
123
|
+
ms_tsd = 0;
|
124
|
+
|
125
|
+
if (!ms)
|
126
|
+
return;
|
127
|
+
|
128
|
+
CHECK(int, 0, pthread_mutex_lock(&global_mtx));
|
129
|
+
cds_list_del(&ms->arena_node); /* remove from arenas_active */
|
130
|
+
cds_list_add(&ms->arena_node, &arenas_unused);
|
131
|
+
CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
|
132
|
+
}
|
133
|
+
|
134
|
+
/* see httpd.h */
|
135
|
+
static void h1d_atfork_prepare(void);
|
136
|
+
static void h1d_atfork_parent(void);
|
137
|
+
static void h1d_start(void);
|
138
|
+
|
139
|
+
ATTR_COLD static void atfork_prepare(void)
|
140
|
+
{
|
141
|
+
h1d_atfork_prepare();
|
142
|
+
call_rcu_before_fork();
|
143
|
+
CHECK(int, 0, pthread_mutex_lock(&global_mtx));
|
144
|
+
}
|
145
|
+
|
146
|
+
ATTR_COLD static void atfork_parent(void)
|
147
|
+
{
|
148
|
+
CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
|
149
|
+
call_rcu_after_fork_parent();
|
150
|
+
CHECK(int, 0, pthread_mutex_lock(&global_mtx));
|
151
|
+
h1d_atfork_parent();
|
152
|
+
CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
|
153
|
+
}
|
154
|
+
|
155
|
+
ATTR_COLD static void reset_mutexes(void); /* mwrap_core.h */
|
156
|
+
|
157
|
+
ATTR_COLD static void atfork_child(void)
|
158
|
+
{
|
159
|
+
CHECK(int, 0, pthread_mutex_init(&global_mtx, 0));
|
160
|
+
|
161
|
+
/*
|
162
|
+
* We should be the only active thread at this point.
|
163
|
+
* Theoretically the application could register another atfork_child
|
164
|
+
* handler which runs before this handler AND spawns a thread which
|
165
|
+
* calls malloc, not much we can do about that, though.
|
166
|
+
*/
|
167
|
+
cds_list_splice(&arenas_active, &arenas_unused);
|
168
|
+
CDS_INIT_LIST_HEAD(&arenas_active);
|
169
|
+
if (ms_tsd) {
|
170
|
+
cds_list_del(&ms_tsd->arena_node); /* remove from unused */
|
171
|
+
cds_list_add(&ms_tsd->arena_node, &arenas_active);
|
172
|
+
}
|
173
|
+
reset_mutexes();
|
174
|
+
call_rcu_after_fork_child();
|
175
|
+
h1d_start();
|
176
|
+
}
|
177
|
+
|
178
|
+
#if defined(__GLIBC__)
|
179
|
+
# define FIRST_TIME 0
|
180
|
+
#else /* pthread_mutex_lock calls malloc on FreeBSD */
|
181
|
+
static int once;
|
182
|
+
# define FIRST_TIME (uatomic_cmpxchg(&once, 0, 1))
|
183
|
+
#endif
|
184
|
+
|
185
|
+
static __attribute__((noinline)) mstate mstate_acquire_harder(void)
|
186
|
+
{
|
187
|
+
bool do_lock = FIRST_TIME ? false : true;
|
188
|
+
if (do_lock)
|
189
|
+
CHECK(int, 0, pthread_mutex_lock(&global_mtx));
|
190
|
+
if (cds_list_empty(&arenas_unused)) {
|
191
|
+
ms_tsd = create_mspace(0, 0);
|
192
|
+
ms_tsd->seg.sflags = EXTERN_BIT | USE_MMAP_BIT;
|
193
|
+
} else { /* reuse existing */
|
194
|
+
ms_tsd = cds_list_first_entry(&arenas_unused,
|
195
|
+
struct malloc_state, arena_node);
|
196
|
+
cds_list_del(&ms_tsd->arena_node);
|
197
|
+
}
|
198
|
+
|
199
|
+
cds_list_add(&ms_tsd->arena_node, &arenas_active);
|
200
|
+
if (!tlskey)
|
201
|
+
CHECK(int, 0, pthread_key_create(&tlskey, mstate_tsd_dtor));
|
202
|
+
|
203
|
+
if (do_lock)
|
204
|
+
CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
|
205
|
+
CHECK(int, 0, pthread_setspecific(tlskey, ms_tsd));
|
206
|
+
return ms_tsd;
|
207
|
+
}
|
208
|
+
|
209
|
+
/* process remote free requests, returns allocations freed */
|
210
|
+
static size_t remote_free_step(mstate ms)
|
211
|
+
{
|
212
|
+
size_t nfree = 0;
|
213
|
+
struct cds_wfcq_node *node, *n;
|
214
|
+
struct __cds_wfcq_head tmp_hd;
|
215
|
+
struct cds_wfcq_tail tmp_tl;
|
216
|
+
enum cds_wfcq_ret ret;
|
217
|
+
|
218
|
+
___cds_wfcq_init(&tmp_hd, &tmp_tl);
|
219
|
+
ret = __cds_wfcq_splice_nonblocking(&tmp_hd, &tmp_tl,
|
220
|
+
&ms->remote_free_head,
|
221
|
+
&ms->remote_free_tail);
|
222
|
+
|
223
|
+
if (ret == CDS_WFCQ_RET_DEST_EMPTY) {
|
224
|
+
__cds_wfcq_for_each_blocking_safe(&tmp_hd, &tmp_tl, node, n) {
|
225
|
+
++nfree;
|
226
|
+
mspace_free(ms, node);
|
227
|
+
}
|
228
|
+
}
|
229
|
+
mwrap_assert(ret != CDS_WFCQ_RET_DEST_NON_EMPTY);
|
230
|
+
|
231
|
+
return nfree;
|
232
|
+
}
|
233
|
+
|
234
|
+
static void remote_free_finish(mstate ms)
|
235
|
+
{
|
236
|
+
while (remote_free_step(ms)) ;
|
237
|
+
}
|
238
|
+
|
239
|
+
int malloc_trim(size_t pad)
|
240
|
+
{
|
241
|
+
mstate m;
|
242
|
+
int ret = 0;
|
243
|
+
|
244
|
+
CHECK(int, 0, pthread_mutex_lock(&global_mtx));
|
245
|
+
|
246
|
+
/* be lazy for active sibling threads, readers are not synchronized */
|
247
|
+
cds_list_for_each_entry(m, &arenas_active, arena_node)
|
248
|
+
uatomic_set(&m->trim_check, 0);
|
249
|
+
|
250
|
+
/* nobody is using idle arenas, clean immediately */
|
251
|
+
cds_list_for_each_entry(m, &arenas_unused, arena_node) {
|
252
|
+
m->trim_check = 0;
|
253
|
+
remote_free_finish(m);
|
254
|
+
ret |= sys_trim(m, pad);
|
255
|
+
}
|
256
|
+
|
257
|
+
CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
|
258
|
+
|
259
|
+
m = ms_tsd;
|
260
|
+
if (m) { /* trim our own arena immediately */
|
261
|
+
remote_free_finish(m);
|
262
|
+
ret |= sys_trim(m, pad);
|
263
|
+
}
|
264
|
+
return ret;
|
265
|
+
}
|
266
|
+
|
267
|
+
static void remote_free_enqueue(mstate fm, void *mem)
|
268
|
+
{
|
269
|
+
struct cds_wfcq_node *node = mem;
|
270
|
+
|
271
|
+
cds_wfcq_node_init(node);
|
272
|
+
cds_wfcq_enqueue(&fm->remote_free_head, &fm->remote_free_tail, node);
|
273
|
+
/* other thread calls remote_free_step */
|
274
|
+
}
|
275
|
+
|
276
|
+
static void *real_malloc(size_t bytes)
|
277
|
+
{
|
278
|
+
mstate ms = ms_tsd;
|
279
|
+
if (!caa_unlikely(ms))
|
280
|
+
ms = mstate_acquire_harder();
|
281
|
+
|
282
|
+
remote_free_step(ms);
|
283
|
+
return mspace_malloc(ms, bytes);
|
284
|
+
}
|
285
|
+
|
286
|
+
static void real_free(void *mem)
|
287
|
+
{
|
288
|
+
mstate ms = ms_tsd;
|
289
|
+
if (mem) {
|
290
|
+
mchunkptr p = mem2chunk(mem);
|
291
|
+
mstate fm = get_mstate_for(p);
|
292
|
+
if (fm == ms)
|
293
|
+
mspace_free(fm, mem);
|
294
|
+
else
|
295
|
+
remote_free_enqueue(fm, mem);
|
296
|
+
}
|
297
|
+
if (ms)
|
298
|
+
remote_free_step(ms);
|
299
|
+
}
|
@@ -0,0 +1,92 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
|
3
|
+
* Shigeo Mitsunari
|
4
|
+
*
|
5
|
+
* The software is licensed under either the MIT License (below) or the Perl
|
6
|
+
* license.
|
7
|
+
*
|
8
|
+
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
9
|
+
* of this software and associated documentation files (the "Software"), to
|
10
|
+
* deal in the Software without restriction, including without limitation the
|
11
|
+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
12
|
+
* sell copies of the Software, and to permit persons to whom the Software is
|
13
|
+
* furnished to do so, subject to the following conditions:
|
14
|
+
*
|
15
|
+
* The above copyright notice and this permission notice shall be included in
|
16
|
+
* all copies or substantial portions of the Software.
|
17
|
+
*
|
18
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
19
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
20
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
21
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
22
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
23
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
24
|
+
* IN THE SOFTWARE.
|
25
|
+
*/
|
26
|
+
|
27
|
+
#ifndef picohttpparser_h
|
28
|
+
#define picohttpparser_h
|
29
|
+
|
30
|
+
#include <sys/types.h>
|
31
|
+
|
32
|
+
#ifdef _MSC_VER
|
33
|
+
#define ssize_t intptr_t
|
34
|
+
#endif
|
35
|
+
|
36
|
+
#ifdef __cplusplus
|
37
|
+
extern "C" {
|
38
|
+
#endif
|
39
|
+
|
40
|
+
/* contains name and value of a header (name == NULL if is a continuing line
|
41
|
+
* of a multiline header */
|
42
|
+
struct phr_header {
|
43
|
+
const char *name;
|
44
|
+
size_t name_len;
|
45
|
+
const char *value;
|
46
|
+
size_t value_len;
|
47
|
+
};
|
48
|
+
|
49
|
+
/* returns number of bytes consumed if successful, -2 if request is partial,
|
50
|
+
* -1 if failed */
|
51
|
+
static
|
52
|
+
int phr_parse_request(const char *buf, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len,
|
53
|
+
int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len);
|
54
|
+
|
55
|
+
/* ditto */
|
56
|
+
static inline
|
57
|
+
int phr_parse_response(const char *_buf, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
|
58
|
+
struct phr_header *headers, size_t *num_headers, size_t last_len);
|
59
|
+
|
60
|
+
/* ditto */
|
61
|
+
static inline
|
62
|
+
int phr_parse_headers(const char *buf, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len);
|
63
|
+
|
64
|
+
/* should be zero-filled before start */
|
65
|
+
struct phr_chunked_decoder {
|
66
|
+
size_t bytes_left_in_chunk; /* number of bytes left in current chunk */
|
67
|
+
char consume_trailer; /* if trailing headers should be consumed */
|
68
|
+
char _hex_count;
|
69
|
+
char _state;
|
70
|
+
};
|
71
|
+
|
72
|
+
/* the function rewrites the buffer given as (buf, bufsz) removing the chunked-
|
73
|
+
* encoding headers. When the function returns without an error, bufsz is
|
74
|
+
* updated to the length of the decoded data available. Applications should
|
75
|
+
* repeatedly call the function while it returns -2 (incomplete) every time
|
76
|
+
* supplying newly arrived data. If the end of the chunked-encoded data is
|
77
|
+
* found, the function returns a non-negative number indicating the number of
|
78
|
+
* octets left undecoded, that starts from the offset returned by `*bufsz`.
|
79
|
+
* Returns -1 on error.
|
80
|
+
*/
|
81
|
+
static inline
|
82
|
+
ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz);
|
83
|
+
|
84
|
+
/* returns if the chunked decoder is in middle of chunked data */
|
85
|
+
static inline
|
86
|
+
int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder);
|
87
|
+
|
88
|
+
#ifdef __cplusplus
|
89
|
+
}
|
90
|
+
#endif
|
91
|
+
|
92
|
+
#endif
|