mwrap 2.3.0 → 3.0.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,299 @@
1
+ /*
2
+ * Copyright (C) mwrap hackers <mwrap-perl@80x24.org>
3
+ * License: LGPL-2.1+ <https://www.gnu.org/licenses/lgpl-2.1.txt>
4
+ *
5
+ * Unlike the rest of the project, I'm happy with this being LGPL-2.1+
6
+ * since the remote_free_* stuff is meant for glibc, anyways.
7
+ *
8
+ * This is a small wrapper on top of dlmalloc (dlmalloc_c.h) which
9
+ * adds wait-free free(3) multi-threading support to avoid contention
10
+ * with call_rcu.
11
+
12
+ * The wait-free free(3) implementation was proposed for inclusion into
13
+ glibc in 2018 and may eventually become part of glibc:
14
+ https://inbox.sourceware.org/libc-alpha/20180731084936.g4yw6wnvt677miti@dcvr/
15
+
16
+ * Arenas are thread-local and returned to a global pool upon thread
17
+ destruction. This works well for processes with stable thread counts,
18
+ but wastes memory in processes with unstable thread counts.
19
+
20
+ * On Linux with O_TMPFILE support, all allocations are backed by
21
+ a temporary file (in TMPDIR). This avoids OOM errors on
22
+ memory-constrained systems due to the higher-than-normal memory
23
+ usage of mwrap itself.
24
+
25
+ * memalign-family support is ignored (and reimplemented in mwrap_core.h).
26
+ dlmalloc's attempts to improve memory-efficiency is prone to fragmentation
27
+ if memaligned-allocations are repeatedly freed and relalocated while
28
+ normal mallocs are happening. The complexity and work needed to
29
+ avoid it does not seem worthwhile nowadays given:
30
+ 1) memalign usage isn't common
31
+ 2) 64-bit systems with virtually unlimited VA space are common
32
+ see https://sourceware.org/bugzilla/show_bug.cgi?id=14581
33
+
34
+ * realloc and calloc are also reimplemented naively in mwrap_core.h since
35
+ the optimizations in made it harder to deal with accounting needs
36
+ of mwrap. They may be reinstated in the future.
37
+ */
38
+ #include "check.h"
39
+ #include "gcc.h"
40
+ #include <urcu/rculist.h>
41
+ #include <urcu/wfcqueue.h>
42
+ #include <urcu-bp.h>
43
+ #include <sys/types.h>
44
+ #include <sys/stat.h>
45
+ #include <fcntl.h>
46
+ #include <errno.h>
47
+
48
+ /* this is fine on most x86-64, especially with file-backed mmap(2) */
49
+ #define DEFAULT_GRANULARITY (64U * 1024U * 1024U)
50
+
51
+ #if !defined(MWRAP_FILE_BACKED) && defined(__linux__) && defined(O_TMPFILE)
52
+ # define MWRAP_FILE_BACKED 1
53
+ #else
54
+ # define MWRAP_FILE_BACKED 0
55
+ #endif
56
+ #if MWRAP_FILE_BACKED
57
+ # include <sys/mman.h>
58
+ static void *my_mmap(size_t size)
59
+ {
60
+ int flags = MAP_PRIVATE;
61
+ const char *tmpdir = getenv("TMPDIR");
62
+ int fd;
63
+ void *ret;
64
+
65
+ if (!tmpdir)
66
+ tmpdir = "/tmp";
67
+
68
+ fd = open(tmpdir, O_TMPFILE|O_RDWR|S_IRUSR|S_IWUSR, 0600);
69
+ if (fd < 0) {
70
+ flags |= MAP_ANONYMOUS;
71
+ } else {
72
+ int t = ftruncate(fd, size); /* sparse file */
73
+
74
+ if (t < 0) {
75
+ flags |= MAP_ANONYMOUS;
76
+ close(fd);
77
+ fd = -1;
78
+ }
79
+ }
80
+ ret = mmap(NULL, size, PROT_READ|PROT_WRITE, flags, fd, 0);
81
+ if (fd >= 0) {
82
+ int err = errno;
83
+ close(fd);
84
+ errno = err;
85
+ }
86
+ return ret;
87
+ }
88
+ #endif /* MWRAP_FILE_BACKED */
89
+
90
+ /* knobs for dlmalloc */
91
+ #define FOOTERS 1 /* required for remote_free_* stuff */
92
+ #define USE_DL_PREFIX
93
+ #define ONLY_MSPACES 1 /* aka per-thread "arenas" */
94
+ #define DLMALLOC_EXPORT static inline
95
+ /* #define NO_MALLOC_STATS 1 */
96
+ #define USE_LOCKS 0 /* we do our own global_mtx + ms_tsd */
97
+ #if MWRAP_FILE_BACKED
98
+ # define MMAP(size) my_mmap(size)
99
+ # define HAVE_MREMAP 0
100
+ #endif
101
+ #include "dlmalloc_c.h"
102
+ #undef ABORT /* conflicts with Perl */
103
+ #undef NOINLINE /* conflicts with Ruby, defined by dlmalloc_c.h */
104
+ #undef HAVE_MREMAP /* conflicts with Ruby 3.2 */
105
+
106
+ static MWRAP_TSD mstate ms_tsd;
107
+
108
+ /* global_mtx protects arenas_active, arenas_unused, and tlskey init */
109
+ static pthread_mutex_t global_mtx = PTHREAD_MUTEX_INITIALIZER;
110
+ static pthread_key_t tlskey;
111
+ static CDS_LIST_HEAD(arenas_active);
112
+ static CDS_LIST_HEAD(arenas_unused);
113
+
114
+ /* called on pthread exit */
115
+ ATTR_COLD static void mstate_tsd_dtor(void *p)
116
+ {
117
+ mstate ms = p;
118
+
119
+ /*
120
+ * In case another destructor calls free (or any allocation function,
121
+ * in that case we leak the mstate forever)
122
+ */
123
+ ms_tsd = 0;
124
+
125
+ if (!ms)
126
+ return;
127
+
128
+ CHECK(int, 0, pthread_mutex_lock(&global_mtx));
129
+ cds_list_del(&ms->arena_node); /* remove from arenas_active */
130
+ cds_list_add(&ms->arena_node, &arenas_unused);
131
+ CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
132
+ }
133
+
134
+ /* see httpd.h */
135
+ static void h1d_atfork_prepare(void);
136
+ static void h1d_atfork_parent(void);
137
+ static void h1d_start(void);
138
+
139
+ ATTR_COLD static void atfork_prepare(void)
140
+ {
141
+ h1d_atfork_prepare();
142
+ call_rcu_before_fork();
143
+ CHECK(int, 0, pthread_mutex_lock(&global_mtx));
144
+ }
145
+
146
+ ATTR_COLD static void atfork_parent(void)
147
+ {
148
+ CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
149
+ call_rcu_after_fork_parent();
150
+ CHECK(int, 0, pthread_mutex_lock(&global_mtx));
151
+ h1d_atfork_parent();
152
+ CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
153
+ }
154
+
155
+ ATTR_COLD static void reset_mutexes(void); /* mwrap_core.h */
156
+
157
+ ATTR_COLD static void atfork_child(void)
158
+ {
159
+ CHECK(int, 0, pthread_mutex_init(&global_mtx, 0));
160
+
161
+ /*
162
+ * We should be the only active thread at this point.
163
+ * Theoretically the application could register another atfork_child
164
+ * handler which runs before this handler AND spawns a thread which
165
+ * calls malloc, not much we can do about that, though.
166
+ */
167
+ cds_list_splice(&arenas_active, &arenas_unused);
168
+ CDS_INIT_LIST_HEAD(&arenas_active);
169
+ if (ms_tsd) {
170
+ cds_list_del(&ms_tsd->arena_node); /* remove from unused */
171
+ cds_list_add(&ms_tsd->arena_node, &arenas_active);
172
+ }
173
+ reset_mutexes();
174
+ call_rcu_after_fork_child();
175
+ h1d_start();
176
+ }
177
+
178
+ #if defined(__GLIBC__)
179
+ # define FIRST_TIME 0
180
+ #else /* pthread_mutex_lock calls malloc on FreeBSD */
181
+ static int once;
182
+ # define FIRST_TIME (uatomic_cmpxchg(&once, 0, 1))
183
+ #endif
184
+
185
+ static __attribute__((noinline)) mstate mstate_acquire_harder(void)
186
+ {
187
+ bool do_lock = FIRST_TIME ? false : true;
188
+ if (do_lock)
189
+ CHECK(int, 0, pthread_mutex_lock(&global_mtx));
190
+ if (cds_list_empty(&arenas_unused)) {
191
+ ms_tsd = create_mspace(0, 0);
192
+ ms_tsd->seg.sflags = EXTERN_BIT | USE_MMAP_BIT;
193
+ } else { /* reuse existing */
194
+ ms_tsd = cds_list_first_entry(&arenas_unused,
195
+ struct malloc_state, arena_node);
196
+ cds_list_del(&ms_tsd->arena_node);
197
+ }
198
+
199
+ cds_list_add(&ms_tsd->arena_node, &arenas_active);
200
+ if (!tlskey)
201
+ CHECK(int, 0, pthread_key_create(&tlskey, mstate_tsd_dtor));
202
+
203
+ if (do_lock)
204
+ CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
205
+ CHECK(int, 0, pthread_setspecific(tlskey, ms_tsd));
206
+ return ms_tsd;
207
+ }
208
+
209
+ /* process remote free requests, returns allocations freed */
210
+ static size_t remote_free_step(mstate ms)
211
+ {
212
+ size_t nfree = 0;
213
+ struct cds_wfcq_node *node, *n;
214
+ struct __cds_wfcq_head tmp_hd;
215
+ struct cds_wfcq_tail tmp_tl;
216
+ enum cds_wfcq_ret ret;
217
+
218
+ ___cds_wfcq_init(&tmp_hd, &tmp_tl);
219
+ ret = __cds_wfcq_splice_nonblocking(&tmp_hd, &tmp_tl,
220
+ &ms->remote_free_head,
221
+ &ms->remote_free_tail);
222
+
223
+ if (ret == CDS_WFCQ_RET_DEST_EMPTY) {
224
+ __cds_wfcq_for_each_blocking_safe(&tmp_hd, &tmp_tl, node, n) {
225
+ ++nfree;
226
+ mspace_free(ms, node);
227
+ }
228
+ }
229
+ mwrap_assert(ret != CDS_WFCQ_RET_DEST_NON_EMPTY);
230
+
231
+ return nfree;
232
+ }
233
+
234
+ static void remote_free_finish(mstate ms)
235
+ {
236
+ while (remote_free_step(ms)) ;
237
+ }
238
+
239
+ int malloc_trim(size_t pad)
240
+ {
241
+ mstate m;
242
+ int ret = 0;
243
+
244
+ CHECK(int, 0, pthread_mutex_lock(&global_mtx));
245
+
246
+ /* be lazy for active sibling threads, readers are not synchronized */
247
+ cds_list_for_each_entry(m, &arenas_active, arena_node)
248
+ uatomic_set(&m->trim_check, 0);
249
+
250
+ /* nobody is using idle arenas, clean immediately */
251
+ cds_list_for_each_entry(m, &arenas_unused, arena_node) {
252
+ m->trim_check = 0;
253
+ remote_free_finish(m);
254
+ ret |= sys_trim(m, pad);
255
+ }
256
+
257
+ CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
258
+
259
+ m = ms_tsd;
260
+ if (m) { /* trim our own arena immediately */
261
+ remote_free_finish(m);
262
+ ret |= sys_trim(m, pad);
263
+ }
264
+ return ret;
265
+ }
266
+
267
+ static void remote_free_enqueue(mstate fm, void *mem)
268
+ {
269
+ struct cds_wfcq_node *node = mem;
270
+
271
+ cds_wfcq_node_init(node);
272
+ cds_wfcq_enqueue(&fm->remote_free_head, &fm->remote_free_tail, node);
273
+ /* other thread calls remote_free_step */
274
+ }
275
+
276
+ static void *real_malloc(size_t bytes)
277
+ {
278
+ mstate ms = ms_tsd;
279
+ if (!caa_unlikely(ms))
280
+ ms = mstate_acquire_harder();
281
+
282
+ remote_free_step(ms);
283
+ return mspace_malloc(ms, bytes);
284
+ }
285
+
286
+ static void real_free(void *mem)
287
+ {
288
+ mstate ms = ms_tsd;
289
+ if (mem) {
290
+ mchunkptr p = mem2chunk(mem);
291
+ mstate fm = get_mstate_for(p);
292
+ if (fm == ms)
293
+ mspace_free(fm, mem);
294
+ else
295
+ remote_free_enqueue(fm, mem);
296
+ }
297
+ if (ms)
298
+ remote_free_step(ms);
299
+ }
@@ -0,0 +1,92 @@
1
+ /*
2
+ * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
3
+ * Shigeo Mitsunari
4
+ *
5
+ * The software is licensed under either the MIT License (below) or the Perl
6
+ * license.
7
+ *
8
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ * of this software and associated documentation files (the "Software"), to
10
+ * deal in the Software without restriction, including without limitation the
11
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12
+ * sell copies of the Software, and to permit persons to whom the Software is
13
+ * furnished to do so, subject to the following conditions:
14
+ *
15
+ * The above copyright notice and this permission notice shall be included in
16
+ * all copies or substantial portions of the Software.
17
+ *
18
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24
+ * IN THE SOFTWARE.
25
+ */
26
+
27
+ #ifndef picohttpparser_h
28
+ #define picohttpparser_h
29
+
30
+ #include <sys/types.h>
31
+
32
+ #ifdef _MSC_VER
33
+ #define ssize_t intptr_t
34
+ #endif
35
+
36
+ #ifdef __cplusplus
37
+ extern "C" {
38
+ #endif
39
+
40
+ /* contains name and value of a header (name == NULL if is a continuing line
41
+ * of a multiline header */
42
+ struct phr_header {
43
+ const char *name;
44
+ size_t name_len;
45
+ const char *value;
46
+ size_t value_len;
47
+ };
48
+
49
+ /* returns number of bytes consumed if successful, -2 if request is partial,
50
+ * -1 if failed */
51
+ static
52
+ int phr_parse_request(const char *buf, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len,
53
+ int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len);
54
+
55
+ /* ditto */
56
+ static inline
57
+ int phr_parse_response(const char *_buf, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
58
+ struct phr_header *headers, size_t *num_headers, size_t last_len);
59
+
60
+ /* ditto */
61
+ static inline
62
+ int phr_parse_headers(const char *buf, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len);
63
+
64
+ /* should be zero-filled before start */
65
+ struct phr_chunked_decoder {
66
+ size_t bytes_left_in_chunk; /* number of bytes left in current chunk */
67
+ char consume_trailer; /* if trailing headers should be consumed */
68
+ char _hex_count;
69
+ char _state;
70
+ };
71
+
72
+ /* the function rewrites the buffer given as (buf, bufsz) removing the chunked-
73
+ * encoding headers. When the function returns without an error, bufsz is
74
+ * updated to the length of the decoded data available. Applications should
75
+ * repeatedly call the function while it returns -2 (incomplete) every time
76
+ * supplying newly arrived data. If the end of the chunked-encoded data is
77
+ * found, the function returns a non-negative number indicating the number of
78
+ * octets left undecoded, that starts from the offset returned by `*bufsz`.
79
+ * Returns -1 on error.
80
+ */
81
+ static inline
82
+ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz);
83
+
84
+ /* returns if the chunked decoder is in middle of chunked data */
85
+ static inline
86
+ int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder);
87
+
88
+ #ifdef __cplusplus
89
+ }
90
+ #endif
91
+
92
+ #endif