mwrap 2.3.0 → 3.0.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,299 @@
1
+ /*
2
+ * Copyright (C) mwrap hackers <mwrap-perl@80x24.org>
3
+ * License: LGPL-2.1+ <https://www.gnu.org/licenses/lgpl-2.1.txt>
4
+ *
5
+ * Unlike the rest of the project, I'm happy with this being LGPL-2.1+
6
+ * since the remote_free_* stuff is meant for glibc, anyways.
7
+ *
8
+ * This is a small wrapper on top of dlmalloc (dlmalloc_c.h) which
9
+ * adds wait-free free(3) multi-threading support to avoid contention
10
+ * with call_rcu.
11
+
12
+ * The wait-free free(3) implementation was proposed for inclusion into
13
+ glibc in 2018 and may eventually become part of glibc:
14
+ https://inbox.sourceware.org/libc-alpha/20180731084936.g4yw6wnvt677miti@dcvr/
15
+
16
+ * Arenas are thread-local and returned to a global pool upon thread
17
+ destruction. This works well for processes with stable thread counts,
18
+ but wastes memory in processes with unstable thread counts.
19
+
20
+ * On Linux with O_TMPFILE support, all allocations are backed by
21
+ a temporary file (in TMPDIR). This avoids OOM errors on
22
+ memory-constrained systems due to the higher-than-normal memory
23
+ usage of mwrap itself.
24
+
25
+ * memalign-family support is ignored (and reimplemented in mwrap_core.h).
26
+ dlmalloc's attempts to improve memory-efficiency is prone to fragmentation
27
+ if memaligned-allocations are repeatedly freed and relalocated while
28
+ normal mallocs are happening. The complexity and work needed to
29
+ avoid it does not seem worthwhile nowadays given:
30
+ 1) memalign usage isn't common
31
+ 2) 64-bit systems with virtually unlimited VA space are common
32
+ see https://sourceware.org/bugzilla/show_bug.cgi?id=14581
33
+
34
+ * realloc and calloc are also reimplemented naively in mwrap_core.h since
35
+ the optimizations in made it harder to deal with accounting needs
36
+ of mwrap. They may be reinstated in the future.
37
+ */
38
+ #include "check.h"
39
+ #include "gcc.h"
40
+ #include <urcu/rculist.h>
41
+ #include <urcu/wfcqueue.h>
42
+ #include <urcu-bp.h>
43
+ #include <sys/types.h>
44
+ #include <sys/stat.h>
45
+ #include <fcntl.h>
46
+ #include <errno.h>
47
+
48
+ /* this is fine on most x86-64, especially with file-backed mmap(2) */
49
+ #define DEFAULT_GRANULARITY (64U * 1024U * 1024U)
50
+
51
+ #if !defined(MWRAP_FILE_BACKED) && defined(__linux__) && defined(O_TMPFILE)
52
+ # define MWRAP_FILE_BACKED 1
53
+ #else
54
+ # define MWRAP_FILE_BACKED 0
55
+ #endif
56
+ #if MWRAP_FILE_BACKED
57
+ # include <sys/mman.h>
58
+ static void *my_mmap(size_t size)
59
+ {
60
+ int flags = MAP_PRIVATE;
61
+ const char *tmpdir = getenv("TMPDIR");
62
+ int fd;
63
+ void *ret;
64
+
65
+ if (!tmpdir)
66
+ tmpdir = "/tmp";
67
+
68
+ fd = open(tmpdir, O_TMPFILE|O_RDWR|S_IRUSR|S_IWUSR, 0600);
69
+ if (fd < 0) {
70
+ flags |= MAP_ANONYMOUS;
71
+ } else {
72
+ int t = ftruncate(fd, size); /* sparse file */
73
+
74
+ if (t < 0) {
75
+ flags |= MAP_ANONYMOUS;
76
+ close(fd);
77
+ fd = -1;
78
+ }
79
+ }
80
+ ret = mmap(NULL, size, PROT_READ|PROT_WRITE, flags, fd, 0);
81
+ if (fd >= 0) {
82
+ int err = errno;
83
+ close(fd);
84
+ errno = err;
85
+ }
86
+ return ret;
87
+ }
88
+ #endif /* MWRAP_FILE_BACKED */
89
+
90
+ /* knobs for dlmalloc */
91
+ #define FOOTERS 1 /* required for remote_free_* stuff */
92
+ #define USE_DL_PREFIX
93
+ #define ONLY_MSPACES 1 /* aka per-thread "arenas" */
94
+ #define DLMALLOC_EXPORT static inline
95
+ /* #define NO_MALLOC_STATS 1 */
96
+ #define USE_LOCKS 0 /* we do our own global_mtx + ms_tsd */
97
+ #if MWRAP_FILE_BACKED
98
+ # define MMAP(size) my_mmap(size)
99
+ # define HAVE_MREMAP 0
100
+ #endif
101
+ #include "dlmalloc_c.h"
102
+ #undef ABORT /* conflicts with Perl */
103
+ #undef NOINLINE /* conflicts with Ruby, defined by dlmalloc_c.h */
104
+ #undef HAVE_MREMAP /* conflicts with Ruby 3.2 */
105
+
106
+ static MWRAP_TSD mstate ms_tsd;
107
+
108
+ /* global_mtx protects arenas_active, arenas_unused, and tlskey init */
109
+ static pthread_mutex_t global_mtx = PTHREAD_MUTEX_INITIALIZER;
110
+ static pthread_key_t tlskey;
111
+ static CDS_LIST_HEAD(arenas_active);
112
+ static CDS_LIST_HEAD(arenas_unused);
113
+
114
+ /* called on pthread exit */
115
+ ATTR_COLD static void mstate_tsd_dtor(void *p)
116
+ {
117
+ mstate ms = p;
118
+
119
+ /*
120
+ * In case another destructor calls free (or any allocation function,
121
+ * in that case we leak the mstate forever)
122
+ */
123
+ ms_tsd = 0;
124
+
125
+ if (!ms)
126
+ return;
127
+
128
+ CHECK(int, 0, pthread_mutex_lock(&global_mtx));
129
+ cds_list_del(&ms->arena_node); /* remove from arenas_active */
130
+ cds_list_add(&ms->arena_node, &arenas_unused);
131
+ CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
132
+ }
133
+
134
+ /* see httpd.h */
135
+ static void h1d_atfork_prepare(void);
136
+ static void h1d_atfork_parent(void);
137
+ static void h1d_start(void);
138
+
139
+ ATTR_COLD static void atfork_prepare(void)
140
+ {
141
+ h1d_atfork_prepare();
142
+ call_rcu_before_fork();
143
+ CHECK(int, 0, pthread_mutex_lock(&global_mtx));
144
+ }
145
+
146
+ ATTR_COLD static void atfork_parent(void)
147
+ {
148
+ CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
149
+ call_rcu_after_fork_parent();
150
+ CHECK(int, 0, pthread_mutex_lock(&global_mtx));
151
+ h1d_atfork_parent();
152
+ CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
153
+ }
154
+
155
+ ATTR_COLD static void reset_mutexes(void); /* mwrap_core.h */
156
+
157
+ ATTR_COLD static void atfork_child(void)
158
+ {
159
+ CHECK(int, 0, pthread_mutex_init(&global_mtx, 0));
160
+
161
+ /*
162
+ * We should be the only active thread at this point.
163
+ * Theoretically the application could register another atfork_child
164
+ * handler which runs before this handler AND spawns a thread which
165
+ * calls malloc, not much we can do about that, though.
166
+ */
167
+ cds_list_splice(&arenas_active, &arenas_unused);
168
+ CDS_INIT_LIST_HEAD(&arenas_active);
169
+ if (ms_tsd) {
170
+ cds_list_del(&ms_tsd->arena_node); /* remove from unused */
171
+ cds_list_add(&ms_tsd->arena_node, &arenas_active);
172
+ }
173
+ reset_mutexes();
174
+ call_rcu_after_fork_child();
175
+ h1d_start();
176
+ }
177
+
178
+ #if defined(__GLIBC__)
179
+ # define FIRST_TIME 0
180
+ #else /* pthread_mutex_lock calls malloc on FreeBSD */
181
+ static int once;
182
+ # define FIRST_TIME (uatomic_cmpxchg(&once, 0, 1))
183
+ #endif
184
+
185
+ static __attribute__((noinline)) mstate mstate_acquire_harder(void)
186
+ {
187
+ bool do_lock = FIRST_TIME ? false : true;
188
+ if (do_lock)
189
+ CHECK(int, 0, pthread_mutex_lock(&global_mtx));
190
+ if (cds_list_empty(&arenas_unused)) {
191
+ ms_tsd = create_mspace(0, 0);
192
+ ms_tsd->seg.sflags = EXTERN_BIT | USE_MMAP_BIT;
193
+ } else { /* reuse existing */
194
+ ms_tsd = cds_list_first_entry(&arenas_unused,
195
+ struct malloc_state, arena_node);
196
+ cds_list_del(&ms_tsd->arena_node);
197
+ }
198
+
199
+ cds_list_add(&ms_tsd->arena_node, &arenas_active);
200
+ if (!tlskey)
201
+ CHECK(int, 0, pthread_key_create(&tlskey, mstate_tsd_dtor));
202
+
203
+ if (do_lock)
204
+ CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
205
+ CHECK(int, 0, pthread_setspecific(tlskey, ms_tsd));
206
+ return ms_tsd;
207
+ }
208
+
209
+ /* process remote free requests, returns allocations freed */
210
+ static size_t remote_free_step(mstate ms)
211
+ {
212
+ size_t nfree = 0;
213
+ struct cds_wfcq_node *node, *n;
214
+ struct __cds_wfcq_head tmp_hd;
215
+ struct cds_wfcq_tail tmp_tl;
216
+ enum cds_wfcq_ret ret;
217
+
218
+ ___cds_wfcq_init(&tmp_hd, &tmp_tl);
219
+ ret = __cds_wfcq_splice_nonblocking(&tmp_hd, &tmp_tl,
220
+ &ms->remote_free_head,
221
+ &ms->remote_free_tail);
222
+
223
+ if (ret == CDS_WFCQ_RET_DEST_EMPTY) {
224
+ __cds_wfcq_for_each_blocking_safe(&tmp_hd, &tmp_tl, node, n) {
225
+ ++nfree;
226
+ mspace_free(ms, node);
227
+ }
228
+ }
229
+ mwrap_assert(ret != CDS_WFCQ_RET_DEST_NON_EMPTY);
230
+
231
+ return nfree;
232
+ }
233
+
234
+ static void remote_free_finish(mstate ms)
235
+ {
236
+ while (remote_free_step(ms)) ;
237
+ }
238
+
239
+ int malloc_trim(size_t pad)
240
+ {
241
+ mstate m;
242
+ int ret = 0;
243
+
244
+ CHECK(int, 0, pthread_mutex_lock(&global_mtx));
245
+
246
+ /* be lazy for active sibling threads, readers are not synchronized */
247
+ cds_list_for_each_entry(m, &arenas_active, arena_node)
248
+ uatomic_set(&m->trim_check, 0);
249
+
250
+ /* nobody is using idle arenas, clean immediately */
251
+ cds_list_for_each_entry(m, &arenas_unused, arena_node) {
252
+ m->trim_check = 0;
253
+ remote_free_finish(m);
254
+ ret |= sys_trim(m, pad);
255
+ }
256
+
257
+ CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
258
+
259
+ m = ms_tsd;
260
+ if (m) { /* trim our own arena immediately */
261
+ remote_free_finish(m);
262
+ ret |= sys_trim(m, pad);
263
+ }
264
+ return ret;
265
+ }
266
+
267
+ static void remote_free_enqueue(mstate fm, void *mem)
268
+ {
269
+ struct cds_wfcq_node *node = mem;
270
+
271
+ cds_wfcq_node_init(node);
272
+ cds_wfcq_enqueue(&fm->remote_free_head, &fm->remote_free_tail, node);
273
+ /* other thread calls remote_free_step */
274
+ }
275
+
276
+ static void *real_malloc(size_t bytes)
277
+ {
278
+ mstate ms = ms_tsd;
279
+ if (!caa_unlikely(ms))
280
+ ms = mstate_acquire_harder();
281
+
282
+ remote_free_step(ms);
283
+ return mspace_malloc(ms, bytes);
284
+ }
285
+
286
+ static void real_free(void *mem)
287
+ {
288
+ mstate ms = ms_tsd;
289
+ if (mem) {
290
+ mchunkptr p = mem2chunk(mem);
291
+ mstate fm = get_mstate_for(p);
292
+ if (fm == ms)
293
+ mspace_free(fm, mem);
294
+ else
295
+ remote_free_enqueue(fm, mem);
296
+ }
297
+ if (ms)
298
+ remote_free_step(ms);
299
+ }
@@ -0,0 +1,92 @@
1
+ /*
2
+ * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
3
+ * Shigeo Mitsunari
4
+ *
5
+ * The software is licensed under either the MIT License (below) or the Perl
6
+ * license.
7
+ *
8
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ * of this software and associated documentation files (the "Software"), to
10
+ * deal in the Software without restriction, including without limitation the
11
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12
+ * sell copies of the Software, and to permit persons to whom the Software is
13
+ * furnished to do so, subject to the following conditions:
14
+ *
15
+ * The above copyright notice and this permission notice shall be included in
16
+ * all copies or substantial portions of the Software.
17
+ *
18
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24
+ * IN THE SOFTWARE.
25
+ */
26
+
27
+ #ifndef picohttpparser_h
28
+ #define picohttpparser_h
29
+
30
+ #include <sys/types.h>
31
+
32
+ #ifdef _MSC_VER
33
+ #define ssize_t intptr_t
34
+ #endif
35
+
36
+ #ifdef __cplusplus
37
+ extern "C" {
38
+ #endif
39
+
40
+ /* contains name and value of a header (name == NULL if is a continuing line
41
+ * of a multiline header */
42
+ struct phr_header {
43
+ const char *name;
44
+ size_t name_len;
45
+ const char *value;
46
+ size_t value_len;
47
+ };
48
+
49
+ /* returns number of bytes consumed if successful, -2 if request is partial,
50
+ * -1 if failed */
51
+ static
52
+ int phr_parse_request(const char *buf, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len,
53
+ int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len);
54
+
55
+ /* ditto */
56
+ static inline
57
+ int phr_parse_response(const char *_buf, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
58
+ struct phr_header *headers, size_t *num_headers, size_t last_len);
59
+
60
+ /* ditto */
61
+ static inline
62
+ int phr_parse_headers(const char *buf, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len);
63
+
64
+ /* should be zero-filled before start */
65
+ struct phr_chunked_decoder {
66
+ size_t bytes_left_in_chunk; /* number of bytes left in current chunk */
67
+ char consume_trailer; /* if trailing headers should be consumed */
68
+ char _hex_count;
69
+ char _state;
70
+ };
71
+
72
+ /* the function rewrites the buffer given as (buf, bufsz) removing the chunked-
73
+ * encoding headers. When the function returns without an error, bufsz is
74
+ * updated to the length of the decoded data available. Applications should
75
+ * repeatedly call the function while it returns -2 (incomplete) every time
76
+ * supplying newly arrived data. If the end of the chunked-encoded data is
77
+ * found, the function returns a non-negative number indicating the number of
78
+ * octets left undecoded, that starts from the offset returned by `*bufsz`.
79
+ * Returns -1 on error.
80
+ */
81
+ static inline
82
+ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz);
83
+
84
+ /* returns if the chunked decoder is in middle of chunked data */
85
+ static inline
86
+ int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder);
87
+
88
+ #ifdef __cplusplus
89
+ }
90
+ #endif
91
+
92
+ #endif