mwrap 2.3.0 → 3.0.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1095 @@
1
+ /*
2
+ * Copyright (C) mwrap hackers <mwrap-perl@80x24.org>
3
+ * License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
4
+ * Disclaimer: I don't really know my way around XS or Perl internals well
5
+ */
6
+ #define _LGPL_SOURCE /* allows URCU to inline some stuff */
7
+ #define _GNU_SOURCE
8
+ #include "mymalloc.h" /* includes dlmalloc_c.h */
9
+ #ifndef MWRAP_PERL
10
+ # define MWRAP_PERL 0
11
+ #endif
12
+
13
+ #ifndef MWRAP_RUBY
14
+ # define MWRAP_RUBY 0
15
+ #endif
16
+
17
+ /* set a sensible max to avoid stack overflows */
18
+ #ifndef MWRAP_BT_MAX
19
+ # define MWRAP_BT_MAX 32
20
+ #endif
21
+
22
+ #ifndef _GNU_SOURCE
23
+ # define _GNU_SOURCE
24
+ #endif
25
+ #include <execinfo.h>
26
+ #include <stdio.h>
27
+ #include <stdlib.h>
28
+ #include <string.h>
29
+ #include <assert.h>
30
+ #include <errno.h>
31
+ #include <sys/types.h>
32
+ #include <sys/stat.h>
33
+ #include <fcntl.h>
34
+ #include <pthread.h>
35
+ #include <signal.h>
36
+ #include <urcu-bp.h>
37
+ #include <urcu/rculfhash.h>
38
+ #include <urcu/rculist.h>
39
+ #include <limits.h>
40
+
41
+ #if MWRAP_PERL
42
+ # include "EXTERN.h"
43
+ # include "perl.h"
44
+ # include "XSUB.h"
45
+ # include "embed.h"
46
+ # include "ppport.h"
47
+ #endif
48
+
49
+ /*
50
+ * XXH3 (truncated to 32-bits) seems to provide a ~2% speedup.
51
+ * XXH32 doesn't show improvements over jhash despite rculfhash
52
+ * only supporting 32-bit hash values.
53
+ */
54
+ #if defined(HAVE_XXHASH)
55
+ # define XXH_INLINE_ALL
56
+ # include <xxhash.h>
57
+ # if !defined(XXH3_64bits)
58
+ # warning XXH3_64bits not defined
59
+ # endif
60
+ #endif
61
+
62
+ #if !defined(XXH3_64bits)
63
+ # include "jhash.h"
64
+ #endif
65
+
66
+ #define U24_MAX (1U << 24)
67
+
68
+ /*
69
+ * Perl doesn't have a GC the same way (C) Ruby does, so no GC count.
70
+ * Instead, the relative age of an object is the number of total bytes
71
+ * allocated (and we don't care about overflow on 32-bit since
72
+ * hardly anybody still uses it).
73
+ */
74
+ static size_t total_bytes_inc, total_bytes_dec, nr_file, nr_src_loc;
75
+ static uint32_t bt_req_depth;
76
+
77
+ #if MWRAP_PERL
78
+ extern pthread_key_t __attribute__((weak)) PL_thr_key;
79
+ extern const char __attribute__((weak)) PL_memory_wrap[]; /* needed for -O0 */
80
+ # if !defined(PERL_IMPLICIT_CONTEXT)
81
+ static size_t *root_locating; /* determines if PL_curcop is our thread */
82
+ # endif
83
+ #endif /* MWRAP_PERL */
84
+
85
+ #if MWRAP_RUBY
86
+ static void mw_ruby_set_generation(size_t *, size_t);
87
+ # define SET_GENERATION(gen, size) mw_ruby_set_generation(gen, size)
88
+ static size_t last_gc_count; /* for httpd which runs in a non-GVL thread */
89
+ #endif /* MWRAP_RUBY */
90
+
91
+ #ifndef SET_GENERATION /* C-only builds w/o Perl|Ruby */
92
+ # define SET_GENERATION(gen, size) \
93
+ *gen = uatomic_add_return(&total_bytes_inc, size)
94
+ #endif /* !SET_GENERATION */
95
+
96
+ /* generic stuff: */
97
+ static MWRAP_TSD size_t locating;
98
+ static struct cds_lfht *files, *totals;
99
+ union padded_mutex {
100
+ pthread_mutex_t mtx;
101
+ char pad[64]; /* cache alignment for common CPUs */
102
+ };
103
+
104
+ /* a pool of mutexes for all "struct src_loc" */
105
+ #define MUTEX_NR (1 << 6)
106
+ #define MUTEX_MASK (MUTEX_NR - 1)
107
+ static union padded_mutex mutexes[MUTEX_NR] = {
108
+ [0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
109
+ };
110
+
111
+ #ifdef static_assert
112
+ /* we only use uint32_t for pathname storage for struct alignment */
113
+ static_assert(UINT32_MAX > PATH_MAX, "UINT32_MAX > PATH_MAX");
114
+ #endif
115
+
116
+ static struct cds_lfht *lfht_new(size_t size)
117
+ {
118
+ return cds_lfht_new(size, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
119
+ }
120
+
121
+ static void reset_mutexes(void)
122
+ {
123
+ size_t i;
124
+
125
+ for (i = 0; i < MUTEX_NR; i++)
126
+ CHECK(int, 0, pthread_mutex_init(&mutexes[i].mtx, 0));
127
+ }
128
+
129
+ #ifndef HAVE_MEMPCPY
130
+ static void *my_mempcpy(void *dest, const void *src, size_t n)
131
+ {
132
+ return (char *)memcpy(dest, src, n) + n;
133
+ }
134
+ #define mempcpy(dst,src,n) my_mempcpy(dst,src,n)
135
+ #endif
136
+
137
+ /* stolen from glibc: */
138
+ #define RETURN_ADDRESS(nr) \
139
+ __builtin_extract_return_addr(__builtin_return_address(nr))
140
+
141
+
142
+ #define SRC_LOC_BT(bt) union stk_bt bt; do { \
143
+ uint32_t depth = locating ? 1 : CMM_LOAD_SHARED(bt_req_depth); \
144
+ switch (depth) { \
145
+ case 0: \
146
+ case 1: bt.sl.bt_len = 1; bt.sl.bt[0] = RETURN_ADDRESS(0); break; \
147
+ default: /* skip 1st level of BT since thats our function */ \
148
+ mwrap_assert(depth <= MWRAP_BT_MAX); \
149
+ ++locating; \
150
+ long n = (long)backtrace(bt_dst(&bt), depth); \
151
+ --locating; \
152
+ bt.sl.bt_len = n <= 1 ? 0 : (uint32_t)n - 1; \
153
+ if (n > 1) mwrap_assert(bt.sl.bt[0] == RETURN_ADDRESS(0)); \
154
+ } \
155
+ } while (0)
156
+
157
+ /*
158
+ * only for interpreted sources (Perl/Ruby/etc), not backtrace_symbols* files
159
+ * Allocated via real_malloc / real_free
160
+ */
161
+ struct src_file {
162
+ struct cds_lfht_node nd; /* <=> files table */
163
+ uint32_t fn_hash;
164
+ uint32_t fn_len; /* < PATH_MAX */
165
+ char fn[]; /* NUL-terminated */
166
+ };
167
+
168
+ /* allocated via real_malloc, immortal for safety reasons */
169
+ struct src_loc {
170
+ size_t total;
171
+ size_t freed_bytes;
172
+ size_t allocations;
173
+ size_t frees;
174
+ size_t age_total; /* (age_total / frees) => mean age at free */
175
+ size_t max_lifespan;
176
+ struct cds_lfht_node hnode; /* <=> totals table */
177
+ struct cds_list_head allocs; /* <=> alloc_hdr.node */
178
+ uint32_t loc_hash;
179
+ uint8_t bt_len;
180
+ /* next 3 fields contiguous for hash_src_loc(): */
181
+ unsigned lineno:24; /* nobody should have >=16.7 LoC in one file */
182
+ struct src_file *f;
183
+ void *bt[];
184
+ } __attribute__((packed,aligned(8)));
185
+
186
+ /* sizeof() doesn't work on bitfields */
187
+ #define SIZEOF_LINENO (size_t)(24 / 8)
188
+
189
+ /*
190
+ * Every allocation has this in the header, maintain alignment with malloc
191
+ * Do not expose this to Perl code because of use-after-free concerns.
192
+ */
193
+ struct alloc_hdr {
194
+ struct cds_list_head anode; /* <=> src_loc.allocs */
195
+ union {
196
+ struct {
197
+ size_t gen; /* global age || rb_gc_count() */
198
+ struct src_loc *loc;
199
+ } live;
200
+ struct rcu_head dead;
201
+ } as;
202
+ void *real; /* what to call real_free on (exists for *memalign) */
203
+ size_t size;
204
+ };
205
+
206
+ /* on-stack structures */
207
+ union stk_sf {
208
+ struct src_file sf;
209
+ char buf_[sizeof(struct src_file) + PATH_MAX];
210
+ };
211
+
212
+ union stk_bt {
213
+ struct src_loc sl;
214
+ /* we subtract one level from MWRAP_BT_MAX since we discard one
215
+ * level of backtrace(3) (see below for why) */
216
+ char buf_[sizeof(struct src_loc) + sizeof(void *) * (MWRAP_BT_MAX-1)];
217
+ };
218
+
219
+ /*
220
+ * we discard the 1st-level of the backtrace(3) since it's our *alloc
221
+ * function (and therefore uninteresting), so we want backtrace(3) to
222
+ * write to bt->sl.bt[-1] so that bt->sl.bt[0] is the first interesting
223
+ * thing.
224
+ */
225
+ #ifdef static_assert
226
+ static_assert(offsetof(struct src_loc, f) + sizeof(void *) ==
227
+ offsetof(struct src_loc, bt),
228
+ "bt lineno is is bt[-1]");
229
+ #endif
230
+ static void **bt_dst(union stk_bt *bt)
231
+ {
232
+ return (void **)&bt->sl.f;
233
+ }
234
+
235
+ static struct alloc_hdr *ptr2hdr(void *p)
236
+ {
237
+ return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr));
238
+ }
239
+
240
+ static void *hdr2ptr(struct alloc_hdr *h)
241
+ {
242
+ return (void *)((uintptr_t)h + sizeof(struct alloc_hdr));
243
+ }
244
+
245
+ static int loc_is_addr(const struct src_loc *l)
246
+ {
247
+ return l->f == NULL;
248
+ }
249
+
250
+ static size_t bt_bytelen(const struct src_loc *l)
251
+ {
252
+ return sizeof(l->bt[0]) * l->bt_len;
253
+ }
254
+
255
+ static size_t src_loc_hash_len(const struct src_loc *l)
256
+ {
257
+ return sizeof(l->f) + SIZEOF_LINENO + bt_bytelen(l);
258
+ }
259
+
260
+ static void *src_loc_hash_tip(const struct src_loc *l)
261
+ {
262
+ return (void *)((uintptr_t)&l->bt_len + sizeof(l->bt_len));
263
+ }
264
+
265
+ static int loc_eq(struct cds_lfht_node *node, const void *key)
266
+ {
267
+ const struct src_loc *existing;
268
+ const struct src_loc *k = key;
269
+
270
+ existing = caa_container_of(node, struct src_loc, hnode);
271
+
272
+ return (k->bt_len == existing->bt_len &&
273
+ !memcmp(src_loc_hash_tip(k), src_loc_hash_tip(existing),
274
+ src_loc_hash_len(k)));
275
+ }
276
+
277
+ static int fn_eq(struct cds_lfht_node *node, const void *key)
278
+ {
279
+ const struct src_file *existing;
280
+ const struct src_file *k = key;
281
+
282
+ existing = caa_container_of(node, struct src_file, nd);
283
+
284
+ return (k->fn_len == existing->fn_len &&
285
+ !memcmp(k->fn, existing->fn, k->fn_len));
286
+ }
287
+
288
+ static struct src_loc *src_loc_get(struct cds_lfht *t, const struct src_loc *k)
289
+ {
290
+ struct cds_lfht_iter iter;
291
+ struct cds_lfht_node *cur;
292
+
293
+ mwrap_assert(rcu_read_ongoing());
294
+ cds_lfht_lookup(t, k->loc_hash, loc_eq, k, &iter);
295
+ cur = cds_lfht_iter_get_node(&iter);
296
+ return cur ? caa_container_of(cur, struct src_loc, hnode) : NULL;
297
+ }
298
+
299
+ static struct src_loc *totals_add_rcu(const struct src_loc *k)
300
+ {
301
+ struct src_loc *l;
302
+ struct cds_lfht *t = CMM_LOAD_SHARED(totals);
303
+ if (!t) return NULL;
304
+
305
+ again:
306
+ l = src_loc_get(t, k);
307
+ if (l) {
308
+ uatomic_add(&l->total, k->total);
309
+ uatomic_inc(&l->allocations);
310
+ } else {
311
+ size_t n = bt_bytelen(k) + sizeof(*k);
312
+ struct cds_lfht_node *cur;
313
+
314
+ l = real_malloc(n);
315
+ if (!l) return l;
316
+ memcpy(l, k, n);
317
+ l->freed_bytes = 0;
318
+ l->age_total = 0;
319
+ l->max_lifespan = 0;
320
+ l->freed_bytes = 0;
321
+ l->frees = 0;
322
+ l->allocations = 1;
323
+ CDS_INIT_LIST_HEAD(&l->allocs);
324
+ cur = cds_lfht_add_unique(t, l->loc_hash, loc_eq, l, &l->hnode);
325
+ if (cur == &l->hnode) {
326
+ uatomic_inc(&nr_src_loc);
327
+ } else { /* lost race */
328
+ rcu_read_unlock();
329
+ real_free(l);
330
+ rcu_read_lock();
331
+ goto again;
332
+ }
333
+ }
334
+ return l;
335
+ }
336
+
337
+ static uint32_t do_hash(const void *p, size_t len)
338
+ {
339
+ #if defined(XXH3_64bits)
340
+ union {
341
+ XXH64_hash_t u64;
342
+ uint32_t u32[2];
343
+ } u;
344
+ u.u64 = XXH3_64bits(p, len);
345
+ return u.u32[1];
346
+ #else
347
+ return jhash(p, len, 0xdeadbeef);
348
+ #endif
349
+ }
350
+
351
+ static void hash_src_loc(struct src_loc *l)
352
+ {
353
+ l->loc_hash = do_hash(src_loc_hash_tip(l), src_loc_hash_len(l));
354
+ }
355
+
356
+ static struct src_file *src_file_get(struct cds_lfht *t, struct src_file *k,
357
+ const char *fn, size_t fn_len)
358
+ {
359
+ struct cds_lfht_iter iter;
360
+ struct cds_lfht_node *cur;
361
+
362
+ mwrap_assert(t); /* caller should've bailed if missing */
363
+ if (fn_len >= PATH_MAX)
364
+ return NULL;
365
+ k->fn_len = (uint32_t)fn_len;
366
+ memcpy(k->fn, fn, fn_len);
367
+ k->fn[fn_len] = 0;
368
+ k->fn_hash = do_hash(k->fn, fn_len);
369
+ mwrap_assert(rcu_read_ongoing());
370
+ cds_lfht_lookup(t, k->fn_hash, fn_eq, k, &iter);
371
+ cur = cds_lfht_iter_get_node(&iter);
372
+
373
+ return cur ? caa_container_of(cur, struct src_file, nd) : NULL;
374
+ }
375
+
376
+ #if MWRAP_PERL
377
+ static const COP *mwp_curcop(void)
378
+ {
379
+ if (&PL_thr_key) { /* are we even in a Perl process? */
380
+ # ifdef PERL_IMPLICIT_CONTEXT
381
+ if (aTHX) return PL_curcop;
382
+ # else /* !PERL_IMPLICIT_CONTEXT */
383
+ if (&locating == root_locating) return PL_curcop;
384
+ # endif /* PERL_IMPLICIT_CONTEXT */
385
+ }
386
+ return NULL;
387
+ }
388
+
389
+ static const char *mw_perl_src_file_cstr(unsigned *lineno)
390
+ {
391
+ const COP *cop = mwp_curcop();
392
+ if (!cop) return NULL;
393
+ const char *fn = CopFILE(cop);
394
+ if (!fn) return NULL;
395
+ *lineno = CopLINE(cop);
396
+ return fn;
397
+ }
398
+ # define SRC_FILE_CSTR(lineno) mw_perl_src_file_cstr(lineno)
399
+ #endif /* MWRAP_PERL */
400
+
401
+ #if MWRAP_RUBY
402
+ static const char *mw_ruby_src_file_cstr(unsigned *lineno);
403
+ # define SRC_FILE_CSTR(lineno) mw_ruby_src_file_cstr(lineno)
404
+ #endif /* MWRAP_RUBY */
405
+
406
+ #ifndef SRC_FILE_CSTR /* for C-only compilation */
407
+ # define SRC_FILE_CSTR(lineno) (NULL)
408
+ #endif /* !SRC_FILE_CSTR */
409
+
410
+ static struct src_loc *assign_line(size_t size, struct src_loc *sl,
411
+ const char *fn, unsigned lineno)
412
+ {
413
+ struct src_file *f;
414
+ union stk_sf sf;
415
+ struct cds_lfht_node *cur;
416
+ struct cds_lfht *t = CMM_LOAD_SHARED(files);
417
+
418
+ mwrap_assert(t);
419
+
420
+ size_t len = strlen(fn);
421
+ if (len >= PATH_MAX)
422
+ len = PATH_MAX - 1;
423
+
424
+ if (lineno == UINT_MAX) { /* NOLINE in Perl is UINT_MAX */
425
+ lineno = U24_MAX;
426
+ } else if (lineno > U24_MAX) {
427
+ fprintf(stderr,
428
+ "%s:%u line number exceeds limit (%u), capped\n",
429
+ fn, lineno, U24_MAX);
430
+ lineno = U24_MAX;
431
+ }
432
+ again:
433
+ f = src_file_get(t, &sf.sf, fn, len);
434
+ if (!f) { /* doesn't exist, add a new one */
435
+ f = real_malloc(sizeof(*f) + len + 1);
436
+ if (!f) return NULL;
437
+ memcpy(f, &sf.sf, sizeof(*f) + len + 1);
438
+ cur = cds_lfht_add_unique(t, f->fn_hash, fn_eq, f, &f->nd);
439
+ if (cur == &f->nd) {
440
+ uatomic_inc(&nr_file);
441
+ } else { /* lost race */
442
+ rcu_read_unlock();
443
+ real_free(f);
444
+ rcu_read_lock();
445
+ goto again;
446
+ }
447
+ }
448
+
449
+ sl->total = size;
450
+ sl->f = f;
451
+ sl->lineno = lineno;
452
+ if (f && !bt_req_depth)
453
+ sl->bt_len = 0;
454
+ hash_src_loc(sl);
455
+ return totals_add_rcu(sl);
456
+ }
457
+
458
+ static struct src_loc *
459
+ update_stats_rcu_lock(size_t *gen, size_t size, struct src_loc *sl)
460
+ {
461
+ struct cds_lfht *t = CMM_LOAD_SHARED(totals);
462
+ struct src_loc *ret = NULL;
463
+
464
+ if (caa_unlikely(!t)) return 0; /* not initialized */
465
+ if (locating++) goto out; /* do not recurse into another *alloc */
466
+
467
+ SET_GENERATION(gen, size);
468
+
469
+ unsigned lineno;
470
+ const char *fn = SRC_FILE_CSTR(&lineno);
471
+
472
+ rcu_read_lock();
473
+ if (fn)
474
+ ret = assign_line(size, sl, fn, lineno);
475
+ if (!ret) { /* no associated Perl|Ruby code, just C/C++ */
476
+ sl->total = size;
477
+ sl->f = NULL;
478
+ sl->lineno = 0;
479
+ hash_src_loc(sl);
480
+ ret = totals_add_rcu(sl);
481
+ }
482
+ out:
483
+ --locating;
484
+ return ret;
485
+ }
486
+
487
+ size_t malloc_usable_size(void *p)
488
+ {
489
+ return ptr2hdr(p)->size;
490
+ }
491
+
492
+ static void free_hdr_rcu(struct rcu_head *dead)
493
+ {
494
+ struct alloc_hdr *h = caa_container_of(dead, struct alloc_hdr, as.dead);
495
+ real_free(h->real);
496
+ }
497
+
498
+ static pthread_mutex_t *src_loc_mutex_lock(const struct src_loc *l)
499
+ {
500
+ pthread_mutex_t *mtx = &mutexes[l->loc_hash & MUTEX_MASK].mtx;
501
+ CHECK(int, 0, pthread_mutex_lock(mtx));
502
+ return mtx;
503
+ }
504
+
505
+ void free(void *p)
506
+ {
507
+ if (p) {
508
+ struct alloc_hdr *h = ptr2hdr(p);
509
+ struct src_loc *l = h->as.live.loc;
510
+
511
+ if (l) {
512
+ size_t current_bytes = uatomic_read(&total_bytes_inc);
513
+ size_t age = current_bytes - h->as.live.gen;
514
+ uatomic_add(&total_bytes_dec, h->size);
515
+ uatomic_add(&l->freed_bytes, h->size);
516
+ uatomic_set(&h->size, 0);
517
+ uatomic_inc(&l->frees);
518
+ uatomic_add(&l->age_total, age);
519
+
520
+ pthread_mutex_t *mtx = src_loc_mutex_lock(l);
521
+ cds_list_del_rcu(&h->anode);
522
+ if (age > l->max_lifespan)
523
+ l->max_lifespan = age;
524
+ CHECK(int, 0, pthread_mutex_unlock(mtx));
525
+
526
+ call_rcu(&h->as.dead, free_hdr_rcu);
527
+ } else {
528
+ real_free(h->real);
529
+ }
530
+ }
531
+ }
532
+
533
+ static void
534
+ alloc_insert_rcu(struct src_loc *sl, struct alloc_hdr *h, size_t size,
535
+ void *real)
536
+ {
537
+ h->size = size;
538
+ h->real = real;
539
+ size_t gen = 0;
540
+ struct src_loc *l = update_stats_rcu_lock(&gen, size, sl);
541
+ h->as.live.loc = l;
542
+ h->as.live.gen = gen;
543
+ if (l) {
544
+ pthread_mutex_t *mtx = src_loc_mutex_lock(l);
545
+ cds_list_add_rcu(&h->anode, &l->allocs);
546
+ CHECK(int, 0, pthread_mutex_unlock(mtx));
547
+ rcu_read_unlock();
548
+ }
549
+ }
550
+
551
+ static bool ptr_is_aligned(void *ptr, size_t alignment)
552
+ {
553
+ return ((uintptr_t) ptr & (alignment - 1)) == 0;
554
+ }
555
+
556
+ static void *ptr_align(void *ptr, size_t alignment)
557
+ {
558
+ return (void *)(((uintptr_t) ptr + (alignment - 1)) & ~(alignment - 1));
559
+ }
560
+
561
+ static bool is_power_of_two(size_t n)
562
+ {
563
+ return (n & (n - 1)) == 0;
564
+ }
565
+
566
+ static int
567
+ mwrap_memalign(void **pp, size_t alignment, size_t size, struct src_loc *sl)
568
+ {
569
+ void *real;
570
+ size_t asize;
571
+ size_t d = alignment / sizeof(void*);
572
+ size_t r = alignment % sizeof(void*);
573
+
574
+ if (r != 0 || d == 0 || !is_power_of_two(d))
575
+ return EINVAL;
576
+
577
+ if (alignment <= MALLOC_ALIGNMENT) {
578
+ void *p = malloc(size);
579
+ if (!p) return ENOMEM;
580
+ *pp = p;
581
+ return 0;
582
+ }
583
+ for (; alignment < sizeof(struct alloc_hdr); alignment *= 2)
584
+ ; /* double alignment until >= sizeof(struct alloc_hdr) */
585
+ if (__builtin_add_overflow(size, alignment, &asize) ||
586
+ __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
587
+ return ENOMEM;
588
+
589
+ real = real_malloc(asize);
590
+ if (real) {
591
+ void *p = hdr2ptr(real);
592
+ if (!ptr_is_aligned(p, alignment))
593
+ p = ptr_align(p, alignment);
594
+ struct alloc_hdr *h = ptr2hdr(p);
595
+ alloc_insert_rcu(sl, h, size, real);
596
+ *pp = p;
597
+ }
598
+
599
+ return real ? 0 : ENOMEM;
600
+ }
601
+
602
+ static void *memalign_result(int err, void *p)
603
+ {
604
+ if (caa_unlikely(err))
605
+ errno = err;
606
+ return p;
607
+ }
608
+
609
+ void *memalign(size_t alignment, size_t size)
610
+ {
611
+ void *p = NULL;
612
+ SRC_LOC_BT(bt);
613
+ int err = mwrap_memalign(&p, alignment, size, &bt.sl);
614
+ return memalign_result(err, p);
615
+ }
616
+
617
+ int posix_memalign(void **p, size_t alignment, size_t size)
618
+ {
619
+ SRC_LOC_BT(bt);
620
+ return mwrap_memalign(p, alignment, size, &bt.sl);
621
+ }
622
+
623
+ /* these aliases aren't needed for glibc, not sure about other libcs... */
624
+ void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign")));
625
+ void cfree(void *) __attribute__((__nothrow__))
626
+ __attribute__((__leaf__)) __attribute__((alias("free")));
627
+
628
+ void *valloc(size_t size)
629
+ {
630
+ ensure_initialization();
631
+ SRC_LOC_BT(bt);
632
+ void *p = NULL;
633
+ int err = mwrap_memalign(&p, mparams.page_size, size, &bt.sl);
634
+ return memalign_result(err, p);
635
+ }
636
+
637
+ #if __GNUC__ < 7
638
+ # define add_overflow_p(a,b) __extension__({ \
639
+ __typeof__(a) _c; \
640
+ __builtin_add_overflow(a,b,&_c); \
641
+ })
642
+ #else
643
+ # define add_overflow_p(a,b) \
644
+ __builtin_add_overflow_p((a),(b),(__typeof__(a+b))0)
645
+ #endif
646
+
647
+ static size_t size_align(size_t size, size_t alignment)
648
+ {
649
+ return ((size + (alignment - 1)) & ~(alignment - 1));
650
+ }
651
+
652
+ void *pvalloc(size_t size)
653
+ {
654
+ void *p = NULL;
655
+
656
+ ensure_initialization();
657
+
658
+ if (add_overflow_p(size, mparams.page_size)) {
659
+ errno = ENOMEM;
660
+ return 0;
661
+ }
662
+ size = size_align(size, mparams.page_size);
663
+ SRC_LOC_BT(bt);
664
+ int err = mwrap_memalign(&p, mparams.page_size, size, &bt.sl);
665
+ return memalign_result(err, p);
666
+ }
667
+
668
+ void *malloc(size_t size)
669
+ {
670
+ size_t asize;
671
+
672
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
673
+ goto enomem;
674
+
675
+ void *p = real_malloc(asize);
676
+ if (p) {
677
+ SRC_LOC_BT(bt);
678
+ struct alloc_hdr *h = p;
679
+ alloc_insert_rcu(&bt.sl, h, size, h);
680
+ return hdr2ptr(h);
681
+ }
682
+ enomem:
683
+ errno = ENOMEM;
684
+ return 0;
685
+ }
686
+
687
+ void *calloc(size_t nmemb, size_t size)
688
+ {
689
+ size_t asize;
690
+
691
+ if (__builtin_mul_overflow(size, nmemb, &size))
692
+ goto enomem;
693
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
694
+ goto enomem;
695
+ void *p = real_malloc(asize);
696
+ if (p) {
697
+ struct alloc_hdr *h = p;
698
+ SRC_LOC_BT(bt);
699
+ alloc_insert_rcu(&bt.sl, h, size, h);
700
+ return memset(hdr2ptr(h), 0, size);
701
+ }
702
+ enomem:
703
+ errno = ENOMEM;
704
+ return 0;
705
+ }
706
+
707
+ void *realloc(void *ptr, size_t size)
708
+ {
709
+ size_t asize;
710
+
711
+ if (!size) {
712
+ free(ptr);
713
+ return 0;
714
+ }
715
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
716
+ goto enomem;
717
+ void *p = real_malloc(asize);
718
+ if (p) {
719
+ struct alloc_hdr *h = p;
720
+ SRC_LOC_BT(bt);
721
+ alloc_insert_rcu(&bt.sl, h, size, h);
722
+ p = hdr2ptr(h);
723
+ if (ptr) {
724
+ struct alloc_hdr *old = ptr2hdr(ptr);
725
+ memcpy(p, ptr, old->size < size ? old->size : size);
726
+ free(ptr);
727
+ }
728
+ return p;
729
+ }
730
+ enomem:
731
+ errno = ENOMEM;
732
+ return 0;
733
+ }
734
+
735
+ struct dump_arg {
736
+ FILE *fp;
737
+ size_t min;
738
+ };
739
+
740
+ char **bt_syms(void * const *addrlist, uint32_t size)
741
+ {
742
+ mwrap_assert(size < INT_MAX);
743
+ #if defined(__GLIBC__)
744
+ char **s = backtrace_symbols(addrlist, size);
745
+ #else /* make FreeBSD look like glibc output: */
746
+ char **s = backtrace_symbols_fmt(addrlist, size, "%f(%n%D) [%a]");
747
+ #endif
748
+ if (!s) fprintf(stderr, "backtrace_symbols: %m\n");
749
+ return s;
750
+ }
751
+
752
+ /* supported by modern gcc + clang */
753
+ #define AUTO_FREE __attribute__((__cleanup__(cleanup_free)))
754
+ static void cleanup_free(void *any)
755
+ {
756
+ void **p = any;
757
+ free(*p);
758
+ }
759
+
760
+ static void *dump_to_file(struct dump_arg *a)
761
+ {
762
+ struct cds_lfht_iter iter;
763
+ struct src_loc *l;
764
+ struct cds_lfht *t;
765
+
766
+ ++locating;
767
+ rcu_read_lock();
768
+ t = CMM_LOAD_SHARED(totals);
769
+ if (!t)
770
+ goto out_unlock;
771
+
772
+ cds_lfht_for_each_entry(t, &iter, l, hnode) {
773
+ if (l->total <= a->min) continue;
774
+
775
+ if (loc_is_addr(l)) {
776
+ AUTO_FREE char **s = bt_syms(l->bt, 1);
777
+
778
+ if (s)
779
+ fprintf(a->fp, "%16zu %12zu %s\n",
780
+ l->total, l->allocations, s[0]);
781
+ } else {
782
+ fprintf(a->fp, "%16zu %12zu %s:%u\n",
783
+ l->total, l->allocations, l->f->fn, l->lineno);
784
+ }
785
+ }
786
+ out_unlock:
787
+ rcu_read_unlock();
788
+ --locating;
789
+ return 0;
790
+ }
791
+
792
+ /* str = "/path/to/foo.so(+0x123) [0xdeadbeefcafe]" (see bt_syms()) */
793
+ static int extract_addr(const char *str, size_t len, void **p)
794
+ {
795
+ unsigned long x;
796
+ char *e;
797
+ const char *end = str + len;
798
+ const char *c = memrchr(str, '[', len);
799
+
800
+ if (c && (c + 2) < end && c[1] == '0' && c[2] == 'x') {
801
+ errno = 0;
802
+ x = strtoul(c + 3, &e, 16);
803
+ if (!errno && *e == ']') {
804
+ *p = (void *)x;
805
+ return 1;
806
+ }
807
+ }
808
+ return 0;
809
+ }
810
+
811
+ /* str is $PATHNAME:$LINENO, len is strlen(str) */
812
+ static struct src_loc *src_loc_lookup(const char *str, size_t len)
813
+ {
814
+ char *c = memrchr(str, ':', len);
815
+ const char *end = str + len;
816
+ unsigned lineno;
817
+ struct src_loc *l = NULL;
818
+ struct cds_lfht *t = CMM_LOAD_SHARED(files);
819
+ union stk_sf sf;
820
+
821
+ if (!c || c == end || !t)
822
+ return NULL;
823
+
824
+ size_t fn_len = c - str;
825
+ c++;
826
+ if (*c == '-') {
827
+ lineno = U24_MAX;
828
+ } else {
829
+ lineno = 0;
830
+ for (; c < end; c++) {
831
+ if (*c < '0' || *c > '9')
832
+ return NULL;
833
+ lineno *= 10;
834
+ lineno += (*c - '0');
835
+ }
836
+ if (lineno > U24_MAX)
837
+ return NULL;
838
+ }
839
+ rcu_read_lock();
840
+ struct src_file *f = src_file_get(t, &sf.sf, str, fn_len);
841
+ t = CMM_LOAD_SHARED(totals);
842
+ if (f && t) {
843
+ struct src_loc k;
844
+
845
+ k.f = f;
846
+ k.lineno = lineno;
847
+ k.bt_len = 0;
848
+ hash_src_loc(&k);
849
+ l = src_loc_get(t, &k);
850
+ }
851
+ rcu_read_unlock();
852
+ return l;
853
+ }
854
+
855
+ #ifndef O_CLOEXEC
856
+ # define O_CLOEXEC 0
857
+ #endif
858
+ static void h1d_atexit(void);
859
+ __attribute__ ((destructor)) static void mwrap_dtor(void)
860
+ {
861
+ const char *opt = getenv("MWRAP");
862
+ const char *modes[] = { "a", "a+", "w", "w+", "r+" };
863
+ struct dump_arg a = { .min = 0 };
864
+ size_t i;
865
+ int dump_fd;
866
+ char *dump_path;
867
+ char *s;
868
+
869
+ /* n.b. unsetenv("MWRAP") may be called, so run this unconditionally */
870
+ h1d_atexit();
871
+
872
+ if (!opt)
873
+ return;
874
+
875
+ ++locating;
876
+ if ((dump_path = strstr(opt, "dump_path:")) &&
877
+ (dump_path += sizeof("dump_path")) &&
878
+ *dump_path) {
879
+ char *end = strchr(dump_path, ',');
880
+ char buf[PATH_MAX];
881
+ if (end) {
882
+ mwrap_assert((end - dump_path) < (intptr_t)sizeof(buf));
883
+ end = mempcpy(buf, dump_path, end - dump_path);
884
+ *end = 0;
885
+ dump_path = buf;
886
+ }
887
+ dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT,
888
+ 0666);
889
+ if (dump_fd < 0) {
890
+ fprintf(stderr, "open %s failed: %m\n", dump_path);
891
+ goto out;
892
+ }
893
+ }
894
+ else if (!sscanf(opt, "dump_fd:%d", &dump_fd))
895
+ goto out;
896
+
897
+ if ((s = strstr(opt, "dump_min:")))
898
+ sscanf(s, "dump_min:%zu", &a.min);
899
+
900
+ switch (dump_fd) {
901
+ case 0: goto out;
902
+ case 1: a.fp = stdout; break;
903
+ case 2: a.fp = stderr; break;
904
+ default:
905
+ if (dump_fd < 0)
906
+ goto out;
907
+ a.fp = 0;
908
+
909
+ for (i = 0; !a.fp && i < 5; i++)
910
+ a.fp = fdopen(dump_fd, modes[i]);
911
+
912
+ if (!a.fp) {
913
+ fprintf(stderr, "failed to open fd=%d: %m\n", dump_fd);
914
+ goto out;
915
+ }
916
+ /* we'll leak some memory here, but this is a destructor */
917
+ }
918
+ dump_to_file(&a);
919
+ out:
920
+ --locating;
921
+ }
922
+
923
+ static void mwrap_reset(void)
924
+ {
925
+ struct cds_lfht *t;
926
+ struct cds_lfht_iter iter;
927
+ struct src_loc *l;
928
+
929
+ uatomic_set(&total_bytes_inc, 0);
930
+ uatomic_set(&total_bytes_dec, 0);
931
+
932
+ rcu_read_lock();
933
+ t = CMM_LOAD_SHARED(totals);
934
+ if (t)
935
+ cds_lfht_for_each_entry(t, &iter, l, hnode) {
936
+ uatomic_set(&l->total, 0);
937
+ uatomic_set(&l->allocations, 0);
938
+ uatomic_set(&l->frees, 0);
939
+ uatomic_set(&l->freed_bytes, 0);
940
+ uatomic_set(&l->age_total, 0);
941
+ uatomic_set(&l->max_lifespan, 0);
942
+ }
943
+ rcu_read_unlock();
944
+ }
945
+
946
+ static inline struct src_loc *mwrap_get(const char *str, size_t len)
947
+ {
948
+ void *p;
949
+
950
+ if (!extract_addr(str, len, &p))
951
+ return src_loc_lookup(str, len);
952
+
953
+ union stk_bt k;
954
+ struct cds_lfht *t = CMM_LOAD_SHARED(totals);
955
+
956
+ if (!t) return NULL;
957
+ k.sl.f = NULL;
958
+ k.sl.lineno = 0;
959
+ k.sl.bt[0] = p;
960
+ k.sl.bt_len = 1;
961
+ hash_src_loc(&k.sl);
962
+ rcu_read_lock();
963
+ struct src_loc *l = src_loc_get(t, &k.sl);
964
+ rcu_read_unlock();
965
+ return l;
966
+ }
967
+
968
+ static struct src_loc *mwrap_get_bin(const char *buf, size_t len)
969
+ {
970
+ static const size_t min_len = sizeof(struct src_file *) + SIZEOF_LINENO;
971
+
972
+ if (len >= min_len && ((len - min_len) % sizeof(void *)) == 0) {
973
+ struct cds_lfht *t = CMM_LOAD_SHARED(totals);
974
+ if (!t) return NULL;
975
+
976
+ union stk_bt k;
977
+ size_t bt_len = (len - min_len) / sizeof(void *);
978
+
979
+ if (bt_len > MWRAP_BT_MAX)
980
+ return NULL;
981
+ k.sl.bt_len = bt_len;
982
+
983
+ memcpy(src_loc_hash_tip(&k.sl), buf, len);
984
+ hash_src_loc(&k.sl);
985
+ rcu_read_lock();
986
+ struct src_loc *l = src_loc_get(t, &k.sl);
987
+ rcu_read_unlock();
988
+ return l;
989
+ }
990
+ return NULL;
991
+ }
992
+
993
+ static const char *mwrap_env;
994
+ #include "httpd.h"
995
+
996
+ __attribute__((constructor)) static void mwrap_ctor(void)
997
+ {
998
+ sigset_t set, old;
999
+ struct alloc_hdr *h;
1000
+ mwrap_env = getenv("MWRAP");
1001
+
1002
+ ++locating;
1003
+
1004
+ /* block signals */
1005
+ CHECK(int, 0, sigfillset(&set));
1006
+ CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &set, &old));
1007
+ ensure_initialization();
1008
+ CHECK(int, 0, pthread_key_create(&tlskey, mstate_tsd_dtor));
1009
+
1010
+ /* initialize mutexes used by urcu-bp */
1011
+ CMM_STORE_SHARED(files, lfht_new(256));
1012
+ if (!CMM_LOAD_SHARED(files))
1013
+ fprintf(stderr, "failed to allocate files table\n");
1014
+ CMM_STORE_SHARED(totals, lfht_new(16384));
1015
+ if (!CMM_LOAD_SHARED(totals))
1016
+ fprintf(stderr, "failed to allocate totals table\n");
1017
+ h = real_malloc(sizeof(struct alloc_hdr));
1018
+ if (h) { /* force call_rcu to start background thread */
1019
+ h->real = h;
1020
+ call_rcu(&h->as.dead, free_hdr_rcu);
1021
+ } else
1022
+ fprintf(stderr, "malloc: %m\n");
1023
+
1024
+ h1d_start();
1025
+ CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &old, NULL));
1026
+ CHECK(int, 0, pthread_atfork(atfork_prepare, atfork_parent,
1027
+ atfork_child));
1028
+
1029
+ if (mwrap_env) {
1030
+ const char *bt = strstr(mwrap_env, "bt:");
1031
+ if (bt) {
1032
+ bt += sizeof("bt");
1033
+ errno = 0;
1034
+ char *end;
1035
+ unsigned long n = strtoul(bt, &end, 10);
1036
+ if (n && !errno && (*end == ',' || *end == 0)) {
1037
+ if (n > MWRAP_BT_MAX)
1038
+ n = MWRAP_BT_MAX;
1039
+ CMM_STORE_SHARED(bt_req_depth, (uint32_t)n);
1040
+ }
1041
+ }
1042
+ }
1043
+ --locating;
1044
+ }
1045
+
1046
+ #if MWRAP_RUBY
1047
+ # undef _GNU_SOURCE /* ruby.h redefines it */
1048
+ # include <ruby.h> /* defines HAVE_RUBY_RACTOR_H on 3.0+ */
1049
+ # include <ruby/thread.h>
1050
+ # include <ruby/io.h>
1051
+ # ifdef HAVE_RUBY_RACTOR_H /* Ruby 3.0+ */
1052
+ extern MWRAP_TSD void * __attribute__((weak)) ruby_current_ec;
1053
+ # else /* Ruby 2.6-2.7 */
1054
+ extern void * __attribute__((weak)) ruby_current_execution_context_ptr;
1055
+ # define ruby_current_ec ruby_current_execution_context_ptr
1056
+ # endif /* HAVE_RUBY_RACTOR_H */
1057
+
1058
+ extern void * __attribute__((weak)) ruby_current_vm_ptr; /* for rb_gc_count */
1059
+ extern size_t __attribute__((weak)) rb_gc_count(void);
1060
+ int __attribute__((weak)) ruby_thread_has_gvl_p(void);
1061
+
1062
+ const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev or later */
1063
+ /*
1064
+ * rb_source_location_cstr relies on GET_EC(), and it's possible
1065
+ * to have a native thread but no EC during the early and late
1066
+ * (teardown) phases of the Ruby process
1067
+ */
1068
+ static int has_ec_p(void)
1069
+ {
1070
+ return ruby_thread_has_gvl_p && ruby_thread_has_gvl_p() &&
1071
+ ruby_current_vm_ptr && ruby_current_ec;
1072
+ }
1073
+
1074
+ static void mw_ruby_set_generation(size_t *gen, size_t size)
1075
+ {
1076
+ if (rb_gc_count) {
1077
+ uatomic_add_return(&total_bytes_inc, size);
1078
+ if (has_ec_p()) {
1079
+ *gen = rb_gc_count();
1080
+ uatomic_set(&last_gc_count, *gen);
1081
+ }
1082
+ } else {
1083
+ *gen = uatomic_add_return(&total_bytes_inc, size);
1084
+ }
1085
+ }
1086
+
1087
+ static const char *mw_ruby_src_file_cstr(unsigned *lineno)
1088
+ {
1089
+ if (!has_ec_p()) return NULL;
1090
+ int line;
1091
+ const char *fn = rb_source_location_cstr(&line);
1092
+ *lineno = line < 0 ? UINT_MAX : (unsigned)line;
1093
+ return fn;
1094
+ }
1095
+ #endif /* !MWRAP_RUBY */