mwrap 2.3.0 → 3.0.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1095 @@
1
+ /*
2
+ * Copyright (C) mwrap hackers <mwrap-perl@80x24.org>
3
+ * License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
4
+ * Disclaimer: I don't really know my way around XS or Perl internals well
5
+ */
6
+ #define _LGPL_SOURCE /* allows URCU to inline some stuff */
7
+ #define _GNU_SOURCE
8
+ #include "mymalloc.h" /* includes dlmalloc_c.h */
9
+ #ifndef MWRAP_PERL
10
+ # define MWRAP_PERL 0
11
+ #endif
12
+
13
+ #ifndef MWRAP_RUBY
14
+ # define MWRAP_RUBY 0
15
+ #endif
16
+
17
+ /* set a sensible max to avoid stack overflows */
18
+ #ifndef MWRAP_BT_MAX
19
+ # define MWRAP_BT_MAX 32
20
+ #endif
21
+
22
+ #ifndef _GNU_SOURCE
23
+ # define _GNU_SOURCE
24
+ #endif
25
+ #include <execinfo.h>
26
+ #include <stdio.h>
27
+ #include <stdlib.h>
28
+ #include <string.h>
29
+ #include <assert.h>
30
+ #include <errno.h>
31
+ #include <sys/types.h>
32
+ #include <sys/stat.h>
33
+ #include <fcntl.h>
34
+ #include <pthread.h>
35
+ #include <signal.h>
36
+ #include <urcu-bp.h>
37
+ #include <urcu/rculfhash.h>
38
+ #include <urcu/rculist.h>
39
+ #include <limits.h>
40
+
41
+ #if MWRAP_PERL
42
+ # include "EXTERN.h"
43
+ # include "perl.h"
44
+ # include "XSUB.h"
45
+ # include "embed.h"
46
+ # include "ppport.h"
47
+ #endif
48
+
49
+ /*
50
+ * XXH3 (truncated to 32-bits) seems to provide a ~2% speedup.
51
+ * XXH32 doesn't show improvements over jhash despite rculfhash
52
+ * only supporting 32-bit hash values.
53
+ */
54
+ #if defined(HAVE_XXHASH)
55
+ # define XXH_INLINE_ALL
56
+ # include <xxhash.h>
57
+ # if !defined(XXH3_64bits)
58
+ # warning XXH3_64bits not defined
59
+ # endif
60
+ #endif
61
+
62
+ #if !defined(XXH3_64bits)
63
+ # include "jhash.h"
64
+ #endif
65
+
66
+ #define U24_MAX (1U << 24)
67
+
68
+ /*
69
+ * Perl doesn't have a GC the same way (C) Ruby does, so no GC count.
70
+ * Instead, the relative age of an object is the number of total bytes
71
+ * allocated (and we don't care about overflow on 32-bit since
72
+ * hardly anybody still uses it).
73
+ */
74
+ static size_t total_bytes_inc, total_bytes_dec, nr_file, nr_src_loc;
75
+ static uint32_t bt_req_depth;
76
+
77
+ #if MWRAP_PERL
78
+ extern pthread_key_t __attribute__((weak)) PL_thr_key;
79
+ extern const char __attribute__((weak)) PL_memory_wrap[]; /* needed for -O0 */
80
+ # if !defined(PERL_IMPLICIT_CONTEXT)
81
+ static size_t *root_locating; /* determines if PL_curcop is our thread */
82
+ # endif
83
+ #endif /* MWRAP_PERL */
84
+
85
+ #if MWRAP_RUBY
86
+ static void mw_ruby_set_generation(size_t *, size_t);
87
+ # define SET_GENERATION(gen, size) mw_ruby_set_generation(gen, size)
88
+ static size_t last_gc_count; /* for httpd which runs in a non-GVL thread */
89
+ #endif /* MWRAP_RUBY */
90
+
91
+ #ifndef SET_GENERATION /* C-only builds w/o Perl|Ruby */
92
+ # define SET_GENERATION(gen, size) \
93
+ *gen = uatomic_add_return(&total_bytes_inc, size)
94
+ #endif /* !SET_GENERATION */
95
+
96
+ /* generic stuff: */
97
+ static MWRAP_TSD size_t locating;
98
+ static struct cds_lfht *files, *totals;
99
+ union padded_mutex {
100
+ pthread_mutex_t mtx;
101
+ char pad[64]; /* cache alignment for common CPUs */
102
+ };
103
+
104
+ /* a pool of mutexes for all "struct src_loc" */
105
+ #define MUTEX_NR (1 << 6)
106
+ #define MUTEX_MASK (MUTEX_NR - 1)
107
+ static union padded_mutex mutexes[MUTEX_NR] = {
108
+ [0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
109
+ };
110
+
111
+ #ifdef static_assert
112
+ /* we only use uint32_t for pathname storage for struct alignment */
113
+ static_assert(UINT32_MAX > PATH_MAX, "UINT32_MAX > PATH_MAX");
114
+ #endif
115
+
116
+ static struct cds_lfht *lfht_new(size_t size)
117
+ {
118
+ return cds_lfht_new(size, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
119
+ }
120
+
121
+ static void reset_mutexes(void)
122
+ {
123
+ size_t i;
124
+
125
+ for (i = 0; i < MUTEX_NR; i++)
126
+ CHECK(int, 0, pthread_mutex_init(&mutexes[i].mtx, 0));
127
+ }
128
+
129
+ #ifndef HAVE_MEMPCPY
130
+ static void *my_mempcpy(void *dest, const void *src, size_t n)
131
+ {
132
+ return (char *)memcpy(dest, src, n) + n;
133
+ }
134
+ #define mempcpy(dst,src,n) my_mempcpy(dst,src,n)
135
+ #endif
136
+
137
+ /* stolen from glibc: */
138
+ #define RETURN_ADDRESS(nr) \
139
+ __builtin_extract_return_addr(__builtin_return_address(nr))
140
+
141
+
142
+ #define SRC_LOC_BT(bt) union stk_bt bt; do { \
143
+ uint32_t depth = locating ? 1 : CMM_LOAD_SHARED(bt_req_depth); \
144
+ switch (depth) { \
145
+ case 0: \
146
+ case 1: bt.sl.bt_len = 1; bt.sl.bt[0] = RETURN_ADDRESS(0); break; \
147
+ default: /* skip 1st level of BT since thats our function */ \
148
+ mwrap_assert(depth <= MWRAP_BT_MAX); \
149
+ ++locating; \
150
+ long n = (long)backtrace(bt_dst(&bt), depth); \
151
+ --locating; \
152
+ bt.sl.bt_len = n <= 1 ? 0 : (uint32_t)n - 1; \
153
+ if (n > 1) mwrap_assert(bt.sl.bt[0] == RETURN_ADDRESS(0)); \
154
+ } \
155
+ } while (0)
156
+
157
+ /*
158
+ * only for interpreted sources (Perl/Ruby/etc), not backtrace_symbols* files
159
+ * Allocated via real_malloc / real_free
160
+ */
161
+ struct src_file {
162
+ struct cds_lfht_node nd; /* <=> files table */
163
+ uint32_t fn_hash;
164
+ uint32_t fn_len; /* < PATH_MAX */
165
+ char fn[]; /* NUL-terminated */
166
+ };
167
+
168
+ /* allocated via real_malloc, immortal for safety reasons */
169
+ struct src_loc {
170
+ size_t total;
171
+ size_t freed_bytes;
172
+ size_t allocations;
173
+ size_t frees;
174
+ size_t age_total; /* (age_total / frees) => mean age at free */
175
+ size_t max_lifespan;
176
+ struct cds_lfht_node hnode; /* <=> totals table */
177
+ struct cds_list_head allocs; /* <=> alloc_hdr.node */
178
+ uint32_t loc_hash;
179
+ uint8_t bt_len;
180
+ /* next 3 fields contiguous for hash_src_loc(): */
181
+ unsigned lineno:24; /* nobody should have >=16.7 LoC in one file */
182
+ struct src_file *f;
183
+ void *bt[];
184
+ } __attribute__((packed,aligned(8)));
185
+
186
+ /* sizeof() doesn't work on bitfields */
187
+ #define SIZEOF_LINENO (size_t)(24 / 8)
188
+
189
+ /*
190
+ * Every allocation has this in the header, maintain alignment with malloc
191
+ * Do not expose this to Perl code because of use-after-free concerns.
192
+ */
193
+ struct alloc_hdr {
194
+ struct cds_list_head anode; /* <=> src_loc.allocs */
195
+ union {
196
+ struct {
197
+ size_t gen; /* global age || rb_gc_count() */
198
+ struct src_loc *loc;
199
+ } live;
200
+ struct rcu_head dead;
201
+ } as;
202
+ void *real; /* what to call real_free on (exists for *memalign) */
203
+ size_t size;
204
+ };
205
+
206
+ /* on-stack structures */
207
+ union stk_sf {
208
+ struct src_file sf;
209
+ char buf_[sizeof(struct src_file) + PATH_MAX];
210
+ };
211
+
212
+ union stk_bt {
213
+ struct src_loc sl;
214
+ /* we subtract one level from MWRAP_BT_MAX since we discard one
215
+ * level of backtrace(3) (see below for why) */
216
+ char buf_[sizeof(struct src_loc) + sizeof(void *) * (MWRAP_BT_MAX-1)];
217
+ };
218
+
219
+ /*
220
+ * we discard the 1st-level of the backtrace(3) since it's our *alloc
221
+ * function (and therefore uninteresting), so we want backtrace(3) to
222
+ * write to bt->sl.bt[-1] so that bt->sl.bt[0] is the first interesting
223
+ * thing.
224
+ */
225
+ #ifdef static_assert
226
+ static_assert(offsetof(struct src_loc, f) + sizeof(void *) ==
227
+ offsetof(struct src_loc, bt),
228
+ "bt lineno is is bt[-1]");
229
+ #endif
230
+ static void **bt_dst(union stk_bt *bt)
231
+ {
232
+ return (void **)&bt->sl.f;
233
+ }
234
+
235
+ static struct alloc_hdr *ptr2hdr(void *p)
236
+ {
237
+ return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr));
238
+ }
239
+
240
+ static void *hdr2ptr(struct alloc_hdr *h)
241
+ {
242
+ return (void *)((uintptr_t)h + sizeof(struct alloc_hdr));
243
+ }
244
+
245
+ static int loc_is_addr(const struct src_loc *l)
246
+ {
247
+ return l->f == NULL;
248
+ }
249
+
250
+ static size_t bt_bytelen(const struct src_loc *l)
251
+ {
252
+ return sizeof(l->bt[0]) * l->bt_len;
253
+ }
254
+
255
+ static size_t src_loc_hash_len(const struct src_loc *l)
256
+ {
257
+ return sizeof(l->f) + SIZEOF_LINENO + bt_bytelen(l);
258
+ }
259
+
260
+ static void *src_loc_hash_tip(const struct src_loc *l)
261
+ {
262
+ return (void *)((uintptr_t)&l->bt_len + sizeof(l->bt_len));
263
+ }
264
+
265
+ static int loc_eq(struct cds_lfht_node *node, const void *key)
266
+ {
267
+ const struct src_loc *existing;
268
+ const struct src_loc *k = key;
269
+
270
+ existing = caa_container_of(node, struct src_loc, hnode);
271
+
272
+ return (k->bt_len == existing->bt_len &&
273
+ !memcmp(src_loc_hash_tip(k), src_loc_hash_tip(existing),
274
+ src_loc_hash_len(k)));
275
+ }
276
+
277
+ static int fn_eq(struct cds_lfht_node *node, const void *key)
278
+ {
279
+ const struct src_file *existing;
280
+ const struct src_file *k = key;
281
+
282
+ existing = caa_container_of(node, struct src_file, nd);
283
+
284
+ return (k->fn_len == existing->fn_len &&
285
+ !memcmp(k->fn, existing->fn, k->fn_len));
286
+ }
287
+
288
+ static struct src_loc *src_loc_get(struct cds_lfht *t, const struct src_loc *k)
289
+ {
290
+ struct cds_lfht_iter iter;
291
+ struct cds_lfht_node *cur;
292
+
293
+ mwrap_assert(rcu_read_ongoing());
294
+ cds_lfht_lookup(t, k->loc_hash, loc_eq, k, &iter);
295
+ cur = cds_lfht_iter_get_node(&iter);
296
+ return cur ? caa_container_of(cur, struct src_loc, hnode) : NULL;
297
+ }
298
+
299
+ static struct src_loc *totals_add_rcu(const struct src_loc *k)
300
+ {
301
+ struct src_loc *l;
302
+ struct cds_lfht *t = CMM_LOAD_SHARED(totals);
303
+ if (!t) return NULL;
304
+
305
+ again:
306
+ l = src_loc_get(t, k);
307
+ if (l) {
308
+ uatomic_add(&l->total, k->total);
309
+ uatomic_inc(&l->allocations);
310
+ } else {
311
+ size_t n = bt_bytelen(k) + sizeof(*k);
312
+ struct cds_lfht_node *cur;
313
+
314
+ l = real_malloc(n);
315
+ if (!l) return l;
316
+ memcpy(l, k, n);
317
+ l->freed_bytes = 0;
318
+ l->age_total = 0;
319
+ l->max_lifespan = 0;
320
+ l->freed_bytes = 0;
321
+ l->frees = 0;
322
+ l->allocations = 1;
323
+ CDS_INIT_LIST_HEAD(&l->allocs);
324
+ cur = cds_lfht_add_unique(t, l->loc_hash, loc_eq, l, &l->hnode);
325
+ if (cur == &l->hnode) {
326
+ uatomic_inc(&nr_src_loc);
327
+ } else { /* lost race */
328
+ rcu_read_unlock();
329
+ real_free(l);
330
+ rcu_read_lock();
331
+ goto again;
332
+ }
333
+ }
334
+ return l;
335
+ }
336
+
337
+ static uint32_t do_hash(const void *p, size_t len)
338
+ {
339
+ #if defined(XXH3_64bits)
340
+ union {
341
+ XXH64_hash_t u64;
342
+ uint32_t u32[2];
343
+ } u;
344
+ u.u64 = XXH3_64bits(p, len);
345
+ return u.u32[1];
346
+ #else
347
+ return jhash(p, len, 0xdeadbeef);
348
+ #endif
349
+ }
350
+
351
+ static void hash_src_loc(struct src_loc *l)
352
+ {
353
+ l->loc_hash = do_hash(src_loc_hash_tip(l), src_loc_hash_len(l));
354
+ }
355
+
356
+ static struct src_file *src_file_get(struct cds_lfht *t, struct src_file *k,
357
+ const char *fn, size_t fn_len)
358
+ {
359
+ struct cds_lfht_iter iter;
360
+ struct cds_lfht_node *cur;
361
+
362
+ mwrap_assert(t); /* caller should've bailed if missing */
363
+ if (fn_len >= PATH_MAX)
364
+ return NULL;
365
+ k->fn_len = (uint32_t)fn_len;
366
+ memcpy(k->fn, fn, fn_len);
367
+ k->fn[fn_len] = 0;
368
+ k->fn_hash = do_hash(k->fn, fn_len);
369
+ mwrap_assert(rcu_read_ongoing());
370
+ cds_lfht_lookup(t, k->fn_hash, fn_eq, k, &iter);
371
+ cur = cds_lfht_iter_get_node(&iter);
372
+
373
+ return cur ? caa_container_of(cur, struct src_file, nd) : NULL;
374
+ }
375
+
376
+ #if MWRAP_PERL
377
+ static const COP *mwp_curcop(void)
378
+ {
379
+ if (&PL_thr_key) { /* are we even in a Perl process? */
380
+ # ifdef PERL_IMPLICIT_CONTEXT
381
+ if (aTHX) return PL_curcop;
382
+ # else /* !PERL_IMPLICIT_CONTEXT */
383
+ if (&locating == root_locating) return PL_curcop;
384
+ # endif /* PERL_IMPLICIT_CONTEXT */
385
+ }
386
+ return NULL;
387
+ }
388
+
389
+ static const char *mw_perl_src_file_cstr(unsigned *lineno)
390
+ {
391
+ const COP *cop = mwp_curcop();
392
+ if (!cop) return NULL;
393
+ const char *fn = CopFILE(cop);
394
+ if (!fn) return NULL;
395
+ *lineno = CopLINE(cop);
396
+ return fn;
397
+ }
398
+ # define SRC_FILE_CSTR(lineno) mw_perl_src_file_cstr(lineno)
399
+ #endif /* MWRAP_PERL */
400
+
401
+ #if MWRAP_RUBY
402
+ static const char *mw_ruby_src_file_cstr(unsigned *lineno);
403
+ # define SRC_FILE_CSTR(lineno) mw_ruby_src_file_cstr(lineno)
404
+ #endif /* MWRAP_RUBY */
405
+
406
+ #ifndef SRC_FILE_CSTR /* for C-only compilation */
407
+ # define SRC_FILE_CSTR(lineno) (NULL)
408
+ #endif /* !SRC_FILE_CSTR */
409
+
410
+ static struct src_loc *assign_line(size_t size, struct src_loc *sl,
411
+ const char *fn, unsigned lineno)
412
+ {
413
+ struct src_file *f;
414
+ union stk_sf sf;
415
+ struct cds_lfht_node *cur;
416
+ struct cds_lfht *t = CMM_LOAD_SHARED(files);
417
+
418
+ mwrap_assert(t);
419
+
420
+ size_t len = strlen(fn);
421
+ if (len >= PATH_MAX)
422
+ len = PATH_MAX - 1;
423
+
424
+ if (lineno == UINT_MAX) { /* NOLINE in Perl is UINT_MAX */
425
+ lineno = U24_MAX;
426
+ } else if (lineno > U24_MAX) {
427
+ fprintf(stderr,
428
+ "%s:%u line number exceeds limit (%u), capped\n",
429
+ fn, lineno, U24_MAX);
430
+ lineno = U24_MAX;
431
+ }
432
+ again:
433
+ f = src_file_get(t, &sf.sf, fn, len);
434
+ if (!f) { /* doesn't exist, add a new one */
435
+ f = real_malloc(sizeof(*f) + len + 1);
436
+ if (!f) return NULL;
437
+ memcpy(f, &sf.sf, sizeof(*f) + len + 1);
438
+ cur = cds_lfht_add_unique(t, f->fn_hash, fn_eq, f, &f->nd);
439
+ if (cur == &f->nd) {
440
+ uatomic_inc(&nr_file);
441
+ } else { /* lost race */
442
+ rcu_read_unlock();
443
+ real_free(f);
444
+ rcu_read_lock();
445
+ goto again;
446
+ }
447
+ }
448
+
449
+ sl->total = size;
450
+ sl->f = f;
451
+ sl->lineno = lineno;
452
+ if (f && !bt_req_depth)
453
+ sl->bt_len = 0;
454
+ hash_src_loc(sl);
455
+ return totals_add_rcu(sl);
456
+ }
457
+
458
+ static struct src_loc *
459
+ update_stats_rcu_lock(size_t *gen, size_t size, struct src_loc *sl)
460
+ {
461
+ struct cds_lfht *t = CMM_LOAD_SHARED(totals);
462
+ struct src_loc *ret = NULL;
463
+
464
+ if (caa_unlikely(!t)) return 0; /* not initialized */
465
+ if (locating++) goto out; /* do not recurse into another *alloc */
466
+
467
+ SET_GENERATION(gen, size);
468
+
469
+ unsigned lineno;
470
+ const char *fn = SRC_FILE_CSTR(&lineno);
471
+
472
+ rcu_read_lock();
473
+ if (fn)
474
+ ret = assign_line(size, sl, fn, lineno);
475
+ if (!ret) { /* no associated Perl|Ruby code, just C/C++ */
476
+ sl->total = size;
477
+ sl->f = NULL;
478
+ sl->lineno = 0;
479
+ hash_src_loc(sl);
480
+ ret = totals_add_rcu(sl);
481
+ }
482
+ out:
483
+ --locating;
484
+ return ret;
485
+ }
486
+
487
+ size_t malloc_usable_size(void *p)
488
+ {
489
+ return ptr2hdr(p)->size;
490
+ }
491
+
492
+ static void free_hdr_rcu(struct rcu_head *dead)
493
+ {
494
+ struct alloc_hdr *h = caa_container_of(dead, struct alloc_hdr, as.dead);
495
+ real_free(h->real);
496
+ }
497
+
498
+ static pthread_mutex_t *src_loc_mutex_lock(const struct src_loc *l)
499
+ {
500
+ pthread_mutex_t *mtx = &mutexes[l->loc_hash & MUTEX_MASK].mtx;
501
+ CHECK(int, 0, pthread_mutex_lock(mtx));
502
+ return mtx;
503
+ }
504
+
505
+ void free(void *p)
506
+ {
507
+ if (p) {
508
+ struct alloc_hdr *h = ptr2hdr(p);
509
+ struct src_loc *l = h->as.live.loc;
510
+
511
+ if (l) {
512
+ size_t current_bytes = uatomic_read(&total_bytes_inc);
513
+ size_t age = current_bytes - h->as.live.gen;
514
+ uatomic_add(&total_bytes_dec, h->size);
515
+ uatomic_add(&l->freed_bytes, h->size);
516
+ uatomic_set(&h->size, 0);
517
+ uatomic_inc(&l->frees);
518
+ uatomic_add(&l->age_total, age);
519
+
520
+ pthread_mutex_t *mtx = src_loc_mutex_lock(l);
521
+ cds_list_del_rcu(&h->anode);
522
+ if (age > l->max_lifespan)
523
+ l->max_lifespan = age;
524
+ CHECK(int, 0, pthread_mutex_unlock(mtx));
525
+
526
+ call_rcu(&h->as.dead, free_hdr_rcu);
527
+ } else {
528
+ real_free(h->real);
529
+ }
530
+ }
531
+ }
532
+
533
+ static void
534
+ alloc_insert_rcu(struct src_loc *sl, struct alloc_hdr *h, size_t size,
535
+ void *real)
536
+ {
537
+ h->size = size;
538
+ h->real = real;
539
+ size_t gen = 0;
540
+ struct src_loc *l = update_stats_rcu_lock(&gen, size, sl);
541
+ h->as.live.loc = l;
542
+ h->as.live.gen = gen;
543
+ if (l) {
544
+ pthread_mutex_t *mtx = src_loc_mutex_lock(l);
545
+ cds_list_add_rcu(&h->anode, &l->allocs);
546
+ CHECK(int, 0, pthread_mutex_unlock(mtx));
547
+ rcu_read_unlock();
548
+ }
549
+ }
550
+
551
+ static bool ptr_is_aligned(void *ptr, size_t alignment)
552
+ {
553
+ return ((uintptr_t) ptr & (alignment - 1)) == 0;
554
+ }
555
+
556
+ static void *ptr_align(void *ptr, size_t alignment)
557
+ {
558
+ return (void *)(((uintptr_t) ptr + (alignment - 1)) & ~(alignment - 1));
559
+ }
560
+
561
+ static bool is_power_of_two(size_t n)
562
+ {
563
+ return (n & (n - 1)) == 0;
564
+ }
565
+
566
+ static int
567
+ mwrap_memalign(void **pp, size_t alignment, size_t size, struct src_loc *sl)
568
+ {
569
+ void *real;
570
+ size_t asize;
571
+ size_t d = alignment / sizeof(void*);
572
+ size_t r = alignment % sizeof(void*);
573
+
574
+ if (r != 0 || d == 0 || !is_power_of_two(d))
575
+ return EINVAL;
576
+
577
+ if (alignment <= MALLOC_ALIGNMENT) {
578
+ void *p = malloc(size);
579
+ if (!p) return ENOMEM;
580
+ *pp = p;
581
+ return 0;
582
+ }
583
+ for (; alignment < sizeof(struct alloc_hdr); alignment *= 2)
584
+ ; /* double alignment until >= sizeof(struct alloc_hdr) */
585
+ if (__builtin_add_overflow(size, alignment, &asize) ||
586
+ __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
587
+ return ENOMEM;
588
+
589
+ real = real_malloc(asize);
590
+ if (real) {
591
+ void *p = hdr2ptr(real);
592
+ if (!ptr_is_aligned(p, alignment))
593
+ p = ptr_align(p, alignment);
594
+ struct alloc_hdr *h = ptr2hdr(p);
595
+ alloc_insert_rcu(sl, h, size, real);
596
+ *pp = p;
597
+ }
598
+
599
+ return real ? 0 : ENOMEM;
600
+ }
601
+
602
+ static void *memalign_result(int err, void *p)
603
+ {
604
+ if (caa_unlikely(err))
605
+ errno = err;
606
+ return p;
607
+ }
608
+
609
+ void *memalign(size_t alignment, size_t size)
610
+ {
611
+ void *p = NULL;
612
+ SRC_LOC_BT(bt);
613
+ int err = mwrap_memalign(&p, alignment, size, &bt.sl);
614
+ return memalign_result(err, p);
615
+ }
616
+
617
+ int posix_memalign(void **p, size_t alignment, size_t size)
618
+ {
619
+ SRC_LOC_BT(bt);
620
+ return mwrap_memalign(p, alignment, size, &bt.sl);
621
+ }
622
+
623
+ /* these aliases aren't needed for glibc, not sure about other libcs... */
624
+ void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign")));
625
+ void cfree(void *) __attribute__((__nothrow__))
626
+ __attribute__((__leaf__)) __attribute__((alias("free")));
627
+
628
+ void *valloc(size_t size)
629
+ {
630
+ ensure_initialization();
631
+ SRC_LOC_BT(bt);
632
+ void *p = NULL;
633
+ int err = mwrap_memalign(&p, mparams.page_size, size, &bt.sl);
634
+ return memalign_result(err, p);
635
+ }
636
+
637
+ #if __GNUC__ < 7
638
+ # define add_overflow_p(a,b) __extension__({ \
639
+ __typeof__(a) _c; \
640
+ __builtin_add_overflow(a,b,&_c); \
641
+ })
642
+ #else
643
+ # define add_overflow_p(a,b) \
644
+ __builtin_add_overflow_p((a),(b),(__typeof__(a+b))0)
645
+ #endif
646
+
647
+ static size_t size_align(size_t size, size_t alignment)
648
+ {
649
+ return ((size + (alignment - 1)) & ~(alignment - 1));
650
+ }
651
+
652
+ void *pvalloc(size_t size)
653
+ {
654
+ void *p = NULL;
655
+
656
+ ensure_initialization();
657
+
658
+ if (add_overflow_p(size, mparams.page_size)) {
659
+ errno = ENOMEM;
660
+ return 0;
661
+ }
662
+ size = size_align(size, mparams.page_size);
663
+ SRC_LOC_BT(bt);
664
+ int err = mwrap_memalign(&p, mparams.page_size, size, &bt.sl);
665
+ return memalign_result(err, p);
666
+ }
667
+
668
+ void *malloc(size_t size)
669
+ {
670
+ size_t asize;
671
+
672
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
673
+ goto enomem;
674
+
675
+ void *p = real_malloc(asize);
676
+ if (p) {
677
+ SRC_LOC_BT(bt);
678
+ struct alloc_hdr *h = p;
679
+ alloc_insert_rcu(&bt.sl, h, size, h);
680
+ return hdr2ptr(h);
681
+ }
682
+ enomem:
683
+ errno = ENOMEM;
684
+ return 0;
685
+ }
686
+
687
+ void *calloc(size_t nmemb, size_t size)
688
+ {
689
+ size_t asize;
690
+
691
+ if (__builtin_mul_overflow(size, nmemb, &size))
692
+ goto enomem;
693
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
694
+ goto enomem;
695
+ void *p = real_malloc(asize);
696
+ if (p) {
697
+ struct alloc_hdr *h = p;
698
+ SRC_LOC_BT(bt);
699
+ alloc_insert_rcu(&bt.sl, h, size, h);
700
+ return memset(hdr2ptr(h), 0, size);
701
+ }
702
+ enomem:
703
+ errno = ENOMEM;
704
+ return 0;
705
+ }
706
+
707
+ void *realloc(void *ptr, size_t size)
708
+ {
709
+ size_t asize;
710
+
711
+ if (!size) {
712
+ free(ptr);
713
+ return 0;
714
+ }
715
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
716
+ goto enomem;
717
+ void *p = real_malloc(asize);
718
+ if (p) {
719
+ struct alloc_hdr *h = p;
720
+ SRC_LOC_BT(bt);
721
+ alloc_insert_rcu(&bt.sl, h, size, h);
722
+ p = hdr2ptr(h);
723
+ if (ptr) {
724
+ struct alloc_hdr *old = ptr2hdr(ptr);
725
+ memcpy(p, ptr, old->size < size ? old->size : size);
726
+ free(ptr);
727
+ }
728
+ return p;
729
+ }
730
+ enomem:
731
+ errno = ENOMEM;
732
+ return 0;
733
+ }
734
+
735
+ struct dump_arg {
736
+ FILE *fp;
737
+ size_t min;
738
+ };
739
+
740
+ char **bt_syms(void * const *addrlist, uint32_t size)
741
+ {
742
+ mwrap_assert(size < INT_MAX);
743
+ #if defined(__GLIBC__)
744
+ char **s = backtrace_symbols(addrlist, size);
745
+ #else /* make FreeBSD look like glibc output: */
746
+ char **s = backtrace_symbols_fmt(addrlist, size, "%f(%n%D) [%a]");
747
+ #endif
748
+ if (!s) fprintf(stderr, "backtrace_symbols: %m\n");
749
+ return s;
750
+ }
751
+
752
+ /* supported by modern gcc + clang */
753
+ #define AUTO_FREE __attribute__((__cleanup__(cleanup_free)))
754
+ static void cleanup_free(void *any)
755
+ {
756
+ void **p = any;
757
+ free(*p);
758
+ }
759
+
760
+ static void *dump_to_file(struct dump_arg *a)
761
+ {
762
+ struct cds_lfht_iter iter;
763
+ struct src_loc *l;
764
+ struct cds_lfht *t;
765
+
766
+ ++locating;
767
+ rcu_read_lock();
768
+ t = CMM_LOAD_SHARED(totals);
769
+ if (!t)
770
+ goto out_unlock;
771
+
772
+ cds_lfht_for_each_entry(t, &iter, l, hnode) {
773
+ if (l->total <= a->min) continue;
774
+
775
+ if (loc_is_addr(l)) {
776
+ AUTO_FREE char **s = bt_syms(l->bt, 1);
777
+
778
+ if (s)
779
+ fprintf(a->fp, "%16zu %12zu %s\n",
780
+ l->total, l->allocations, s[0]);
781
+ } else {
782
+ fprintf(a->fp, "%16zu %12zu %s:%u\n",
783
+ l->total, l->allocations, l->f->fn, l->lineno);
784
+ }
785
+ }
786
+ out_unlock:
787
+ rcu_read_unlock();
788
+ --locating;
789
+ return 0;
790
+ }
791
+
792
+ /* str = "/path/to/foo.so(+0x123) [0xdeadbeefcafe]" (see bt_syms()) */
793
+ static int extract_addr(const char *str, size_t len, void **p)
794
+ {
795
+ unsigned long x;
796
+ char *e;
797
+ const char *end = str + len;
798
+ const char *c = memrchr(str, '[', len);
799
+
800
+ if (c && (c + 2) < end && c[1] == '0' && c[2] == 'x') {
801
+ errno = 0;
802
+ x = strtoul(c + 3, &e, 16);
803
+ if (!errno && *e == ']') {
804
+ *p = (void *)x;
805
+ return 1;
806
+ }
807
+ }
808
+ return 0;
809
+ }
810
+
811
+ /* str is $PATHNAME:$LINENO, len is strlen(str) */
812
+ static struct src_loc *src_loc_lookup(const char *str, size_t len)
813
+ {
814
+ char *c = memrchr(str, ':', len);
815
+ const char *end = str + len;
816
+ unsigned lineno;
817
+ struct src_loc *l = NULL;
818
+ struct cds_lfht *t = CMM_LOAD_SHARED(files);
819
+ union stk_sf sf;
820
+
821
+ if (!c || c == end || !t)
822
+ return NULL;
823
+
824
+ size_t fn_len = c - str;
825
+ c++;
826
+ if (*c == '-') {
827
+ lineno = U24_MAX;
828
+ } else {
829
+ lineno = 0;
830
+ for (; c < end; c++) {
831
+ if (*c < '0' || *c > '9')
832
+ return NULL;
833
+ lineno *= 10;
834
+ lineno += (*c - '0');
835
+ }
836
+ if (lineno > U24_MAX)
837
+ return NULL;
838
+ }
839
+ rcu_read_lock();
840
+ struct src_file *f = src_file_get(t, &sf.sf, str, fn_len);
841
+ t = CMM_LOAD_SHARED(totals);
842
+ if (f && t) {
843
+ struct src_loc k;
844
+
845
+ k.f = f;
846
+ k.lineno = lineno;
847
+ k.bt_len = 0;
848
+ hash_src_loc(&k);
849
+ l = src_loc_get(t, &k);
850
+ }
851
+ rcu_read_unlock();
852
+ return l;
853
+ }
854
+
855
+ #ifndef O_CLOEXEC
856
+ # define O_CLOEXEC 0
857
+ #endif
858
+ static void h1d_atexit(void);
859
+ __attribute__ ((destructor)) static void mwrap_dtor(void)
860
+ {
861
+ const char *opt = getenv("MWRAP");
862
+ const char *modes[] = { "a", "a+", "w", "w+", "r+" };
863
+ struct dump_arg a = { .min = 0 };
864
+ size_t i;
865
+ int dump_fd;
866
+ char *dump_path;
867
+ char *s;
868
+
869
+ /* n.b. unsetenv("MWRAP") may be called, so run this unconditionally */
870
+ h1d_atexit();
871
+
872
+ if (!opt)
873
+ return;
874
+
875
+ ++locating;
876
+ if ((dump_path = strstr(opt, "dump_path:")) &&
877
+ (dump_path += sizeof("dump_path")) &&
878
+ *dump_path) {
879
+ char *end = strchr(dump_path, ',');
880
+ char buf[PATH_MAX];
881
+ if (end) {
882
+ mwrap_assert((end - dump_path) < (intptr_t)sizeof(buf));
883
+ end = mempcpy(buf, dump_path, end - dump_path);
884
+ *end = 0;
885
+ dump_path = buf;
886
+ }
887
+ dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT,
888
+ 0666);
889
+ if (dump_fd < 0) {
890
+ fprintf(stderr, "open %s failed: %m\n", dump_path);
891
+ goto out;
892
+ }
893
+ }
894
+ else if (!sscanf(opt, "dump_fd:%d", &dump_fd))
895
+ goto out;
896
+
897
+ if ((s = strstr(opt, "dump_min:")))
898
+ sscanf(s, "dump_min:%zu", &a.min);
899
+
900
+ switch (dump_fd) {
901
+ case 0: goto out;
902
+ case 1: a.fp = stdout; break;
903
+ case 2: a.fp = stderr; break;
904
+ default:
905
+ if (dump_fd < 0)
906
+ goto out;
907
+ a.fp = 0;
908
+
909
+ for (i = 0; !a.fp && i < 5; i++)
910
+ a.fp = fdopen(dump_fd, modes[i]);
911
+
912
+ if (!a.fp) {
913
+ fprintf(stderr, "failed to open fd=%d: %m\n", dump_fd);
914
+ goto out;
915
+ }
916
+ /* we'll leak some memory here, but this is a destructor */
917
+ }
918
+ dump_to_file(&a);
919
+ out:
920
+ --locating;
921
+ }
922
+
923
+ static void mwrap_reset(void)
924
+ {
925
+ struct cds_lfht *t;
926
+ struct cds_lfht_iter iter;
927
+ struct src_loc *l;
928
+
929
+ uatomic_set(&total_bytes_inc, 0);
930
+ uatomic_set(&total_bytes_dec, 0);
931
+
932
+ rcu_read_lock();
933
+ t = CMM_LOAD_SHARED(totals);
934
+ if (t)
935
+ cds_lfht_for_each_entry(t, &iter, l, hnode) {
936
+ uatomic_set(&l->total, 0);
937
+ uatomic_set(&l->allocations, 0);
938
+ uatomic_set(&l->frees, 0);
939
+ uatomic_set(&l->freed_bytes, 0);
940
+ uatomic_set(&l->age_total, 0);
941
+ uatomic_set(&l->max_lifespan, 0);
942
+ }
943
+ rcu_read_unlock();
944
+ }
945
+
946
+ static inline struct src_loc *mwrap_get(const char *str, size_t len)
947
+ {
948
+ void *p;
949
+
950
+ if (!extract_addr(str, len, &p))
951
+ return src_loc_lookup(str, len);
952
+
953
+ union stk_bt k;
954
+ struct cds_lfht *t = CMM_LOAD_SHARED(totals);
955
+
956
+ if (!t) return NULL;
957
+ k.sl.f = NULL;
958
+ k.sl.lineno = 0;
959
+ k.sl.bt[0] = p;
960
+ k.sl.bt_len = 1;
961
+ hash_src_loc(&k.sl);
962
+ rcu_read_lock();
963
+ struct src_loc *l = src_loc_get(t, &k.sl);
964
+ rcu_read_unlock();
965
+ return l;
966
+ }
967
+
968
+ static struct src_loc *mwrap_get_bin(const char *buf, size_t len)
969
+ {
970
+ static const size_t min_len = sizeof(struct src_file *) + SIZEOF_LINENO;
971
+
972
+ if (len >= min_len && ((len - min_len) % sizeof(void *)) == 0) {
973
+ struct cds_lfht *t = CMM_LOAD_SHARED(totals);
974
+ if (!t) return NULL;
975
+
976
+ union stk_bt k;
977
+ size_t bt_len = (len - min_len) / sizeof(void *);
978
+
979
+ if (bt_len > MWRAP_BT_MAX)
980
+ return NULL;
981
+ k.sl.bt_len = bt_len;
982
+
983
+ memcpy(src_loc_hash_tip(&k.sl), buf, len);
984
+ hash_src_loc(&k.sl);
985
+ rcu_read_lock();
986
+ struct src_loc *l = src_loc_get(t, &k.sl);
987
+ rcu_read_unlock();
988
+ return l;
989
+ }
990
+ return NULL;
991
+ }
992
+
993
+ static const char *mwrap_env;
994
+ #include "httpd.h"
995
+
996
+ __attribute__((constructor)) static void mwrap_ctor(void)
997
+ {
998
+ sigset_t set, old;
999
+ struct alloc_hdr *h;
1000
+ mwrap_env = getenv("MWRAP");
1001
+
1002
+ ++locating;
1003
+
1004
+ /* block signals */
1005
+ CHECK(int, 0, sigfillset(&set));
1006
+ CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &set, &old));
1007
+ ensure_initialization();
1008
+ CHECK(int, 0, pthread_key_create(&tlskey, mstate_tsd_dtor));
1009
+
1010
+ /* initialize mutexes used by urcu-bp */
1011
+ CMM_STORE_SHARED(files, lfht_new(256));
1012
+ if (!CMM_LOAD_SHARED(files))
1013
+ fprintf(stderr, "failed to allocate files table\n");
1014
+ CMM_STORE_SHARED(totals, lfht_new(16384));
1015
+ if (!CMM_LOAD_SHARED(totals))
1016
+ fprintf(stderr, "failed to allocate totals table\n");
1017
+ h = real_malloc(sizeof(struct alloc_hdr));
1018
+ if (h) { /* force call_rcu to start background thread */
1019
+ h->real = h;
1020
+ call_rcu(&h->as.dead, free_hdr_rcu);
1021
+ } else
1022
+ fprintf(stderr, "malloc: %m\n");
1023
+
1024
+ h1d_start();
1025
+ CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &old, NULL));
1026
+ CHECK(int, 0, pthread_atfork(atfork_prepare, atfork_parent,
1027
+ atfork_child));
1028
+
1029
+ if (mwrap_env) {
1030
+ const char *bt = strstr(mwrap_env, "bt:");
1031
+ if (bt) {
1032
+ bt += sizeof("bt");
1033
+ errno = 0;
1034
+ char *end;
1035
+ unsigned long n = strtoul(bt, &end, 10);
1036
+ if (n && !errno && (*end == ',' || *end == 0)) {
1037
+ if (n > MWRAP_BT_MAX)
1038
+ n = MWRAP_BT_MAX;
1039
+ CMM_STORE_SHARED(bt_req_depth, (uint32_t)n);
1040
+ }
1041
+ }
1042
+ }
1043
+ --locating;
1044
+ }
1045
+
1046
+ #if MWRAP_RUBY
1047
+ # undef _GNU_SOURCE /* ruby.h redefines it */
1048
+ # include <ruby.h> /* defines HAVE_RUBY_RACTOR_H on 3.0+ */
1049
+ # include <ruby/thread.h>
1050
+ # include <ruby/io.h>
1051
+ # ifdef HAVE_RUBY_RACTOR_H /* Ruby 3.0+ */
1052
+ extern MWRAP_TSD void * __attribute__((weak)) ruby_current_ec;
1053
+ # else /* Ruby 2.6-2.7 */
1054
+ extern void * __attribute__((weak)) ruby_current_execution_context_ptr;
1055
+ # define ruby_current_ec ruby_current_execution_context_ptr
1056
+ # endif /* HAVE_RUBY_RACTOR_H */
1057
+
1058
+ extern void * __attribute__((weak)) ruby_current_vm_ptr; /* for rb_gc_count */
1059
+ extern size_t __attribute__((weak)) rb_gc_count(void);
1060
+ int __attribute__((weak)) ruby_thread_has_gvl_p(void);
1061
+
1062
+ const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev or later */
1063
+ /*
1064
+ * rb_source_location_cstr relies on GET_EC(), and it's possible
1065
+ * to have a native thread but no EC during the early and late
1066
+ * (teardown) phases of the Ruby process
1067
+ */
1068
+ static int has_ec_p(void)
1069
+ {
1070
+ return ruby_thread_has_gvl_p && ruby_thread_has_gvl_p() &&
1071
+ ruby_current_vm_ptr && ruby_current_ec;
1072
+ }
1073
+
1074
+ static void mw_ruby_set_generation(size_t *gen, size_t size)
1075
+ {
1076
+ if (rb_gc_count) {
1077
+ uatomic_add_return(&total_bytes_inc, size);
1078
+ if (has_ec_p()) {
1079
+ *gen = rb_gc_count();
1080
+ uatomic_set(&last_gc_count, *gen);
1081
+ }
1082
+ } else {
1083
+ *gen = uatomic_add_return(&total_bytes_inc, size);
1084
+ }
1085
+ }
1086
+
1087
+ static const char *mw_ruby_src_file_cstr(unsigned *lineno)
1088
+ {
1089
+ if (!has_ec_p()) return NULL;
1090
+ int line;
1091
+ const char *fn = rb_source_location_cstr(&line);
1092
+ *lineno = line < 0 ? UINT_MAX : (unsigned)line;
1093
+ return fn;
1094
+ }
1095
+ #endif /* !MWRAP_RUBY */