pf2 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/pf2/khashl.h ADDED
@@ -0,0 +1,506 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2019- by Attractive Chaos <attractor@live.co.uk>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ #ifndef __AC_KHASHL_H
27
+ #define __AC_KHASHL_H
28
+
29
+ #define AC_VERSION_KHASHL_H "r40"
30
+
31
+ #include <stdlib.h>
32
+ #include <string.h>
33
+ #include <limits.h>
34
+
35
+ /************************************
36
+ * Compiler specific configurations *
37
+ ************************************/
38
+
39
+ #if UINT_MAX == 0xffffffffu
40
+ typedef unsigned int khint32_t;
41
+ #elif ULONG_MAX == 0xffffffffu
42
+ typedef unsigned long khint32_t;
43
+ #endif
44
+
45
+ #if ULONG_MAX == ULLONG_MAX
46
+ typedef unsigned long khint64_t;
47
+ #else
48
+ typedef unsigned long long khint64_t;
49
+ #endif
50
+
51
+ #ifndef kh_inline
52
+ #ifdef _MSC_VER
53
+ #define kh_inline __inline
54
+ #else
55
+ #define kh_inline inline
56
+ #endif
57
+ #endif /* kh_inline */
58
+
59
+ #ifndef klib_unused
60
+ #if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
61
+ #define klib_unused __attribute__ ((__unused__))
62
+ #else
63
+ #define klib_unused
64
+ #endif
65
+ #endif /* klib_unused */
66
+
67
+ #define KH_LOCAL static kh_inline klib_unused
68
+
69
+ typedef khint32_t khint_t;
70
+ typedef const char *kh_cstr_t;
71
+
72
+ /***********************
73
+ * Configurable macros *
74
+ ***********************/
75
+
76
+ #ifndef kh_max_count /* set the max load factor */
77
+ #define kh_max_count(cap) (((cap)>>1) + ((cap)>>2)) /* default load factor: 75% */
78
+ #endif
79
+
80
+ #ifndef kh_packed /* pack the key-value struct */
81
+ #define kh_packed __attribute__ ((__packed__))
82
+ #endif
83
+
84
+ #if !defined(Kmalloc) || !defined(Kcalloc) || !defined(Krealloc) || !defined(Kfree)
85
+ #define Kmalloc(km, type, cnt) ((type*)malloc((cnt) * sizeof(type)))
86
+ #define Kcalloc(km, type, cnt) ((type*)calloc((cnt), sizeof(type)))
87
+ #define Krealloc(km, type, ptr, cnt) ((type*)realloc((ptr), (cnt) * sizeof(type)))
88
+ #define Kfree(km, ptr) free(ptr)
89
+ #endif
90
+
91
+ /****************************
92
+ * Simple private functions *
93
+ ****************************/
94
+
95
+ #define __kh_used(flag, i) (flag[i>>5] >> (i&0x1fU) & 1U)
96
+ #define __kh_set_used(flag, i) (flag[i>>5] |= 1U<<(i&0x1fU))
97
+ #define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU)))
98
+
99
+ #define __kh_fsize(m) ((m) < 32? 1 : (m)>>5)
100
+
101
+ static kh_inline khint_t __kh_splitmix32(khint_t *x) { khint_t z = (*x += 0x9e3779b9U); z = (z ^ (z >> 16)) * 0x21f0aaadU; z = (z ^ (z >> 15)) * 0x735a2d97U; return z ^ (z >> 15); }
102
+ static kh_inline khint_t __kh_h2b(khint_t hash, khint_t salt, khint_t bits) { return (hash ^ salt) * 2654435769U >> (32 - bits); } /* Fibonacci hashing */
103
+
104
+ /*******************
105
+ * Hash table base *
106
+ *******************/
107
+
108
+ #define __KHASHL_TYPE(HType, khkey_t) \
109
+ typedef struct HType { \
110
+ void *km; \
111
+ unsigned short bits, salt; \
112
+ khint_t count; \
113
+ khint32_t *used; \
114
+ khkey_t *keys; \
115
+ } HType;
116
+
117
+ #define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \
118
+ extern HType *prefix##_init(void); \
119
+ extern HType *prefix##_init2(void *km); \
120
+ extern void prefix##_destroy(HType *h); \
121
+ extern void prefix##_clear(HType *h); \
122
+ extern khint_t prefix##_getp(const HType *h, const khkey_t *key); \
123
+ extern int prefix##_resize(HType *h, khint_t new_n_buckets); \
124
+ extern khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent); \
125
+ extern void prefix##_del(HType *h, khint_t k);
126
+
127
+ #define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \
128
+ SCOPE HType *prefix##_init3(void *km, khint_t seed) { \
129
+ HType *h = Kcalloc(km, HType, 1); \
130
+ h->km = km; \
131
+ if (seed != 0) h->salt = __kh_splitmix32(&seed); \
132
+ return h; \
133
+ } \
134
+ SCOPE HType *prefix##_init2(void *km) { return prefix##_init3(km, 0); } \
135
+ SCOPE HType *prefix##_init(void) { return prefix##_init2(0); } \
136
+ SCOPE void prefix##_destroy(HType *h) { \
137
+ if (!h) return; \
138
+ Kfree(h->km, (void*)h->keys); Kfree(h->km, h->used); \
139
+ Kfree(h->km, h); \
140
+ } \
141
+ SCOPE void prefix##_clear(HType *h) { \
142
+ if (h && h->used) { \
143
+ khint_t n_buckets = (khint_t)1U << h->bits; \
144
+ memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \
145
+ h->count = 0; \
146
+ } \
147
+ }
148
+
149
+ #define __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
150
+ SCOPE khint_t prefix##_getp_core(const HType *h, const khkey_t *key, khint_t hash) { \
151
+ khint_t i, last, n_buckets, mask; \
152
+ if (h->keys == 0) return 0; \
153
+ n_buckets = (khint_t)1U << h->bits; \
154
+ mask = n_buckets - 1U; \
155
+ i = last = __kh_h2b(hash, h->salt, h->bits); \
156
+ while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \
157
+ i = (i + 1U) & mask; \
158
+ if (i == last) return n_buckets; \
159
+ } \
160
+ return !__kh_used(h->used, i)? n_buckets : i; \
161
+ } \
162
+ SCOPE khint_t prefix##_getp(const HType *h, const khkey_t *key) { return prefix##_getp_core(h, key, __hash_fn(*key)); } \
163
+ SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { return prefix##_getp_core(h, &key, __hash_fn(key)); }
164
+
165
+ #define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
166
+ SCOPE int prefix##_resize(HType *h, khint_t new_n_buckets) { \
167
+ khint32_t *new_used = 0; \
168
+ khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \
169
+ while ((x >>= 1) != 0) ++j; \
170
+ if (new_n_buckets & (new_n_buckets - 1)) ++j; \
171
+ new_bits = j > 2? j : 2; \
172
+ if (new_bits == h->bits) return 0; /* same size; no need to rehash */ \
173
+ new_n_buckets = (khint_t)1U << new_bits; \
174
+ if (h->count > kh_max_count(new_n_buckets)) return 0; /* requested size is too small */ \
175
+ new_used = Kmalloc(h->km, khint32_t, __kh_fsize(new_n_buckets)); \
176
+ if (!new_used) return -1; /* not enough memory */ \
177
+ memset(new_used, 0, __kh_fsize(new_n_buckets) * sizeof(khint32_t)); \
178
+ n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \
179
+ if (n_buckets < new_n_buckets) { /* expand */ \
180
+ khkey_t *new_keys = Krealloc(h->km, khkey_t, h->keys, new_n_buckets); \
181
+ if (!new_keys) { Kfree(h->km, new_used); return -1; } \
182
+ h->keys = new_keys; \
183
+ } \
184
+ new_mask = new_n_buckets - 1; \
185
+ for (j = 0; j != n_buckets; ++j) { \
186
+ khkey_t key; \
187
+ if (!__kh_used(h->used, j)) continue; \
188
+ key = h->keys[j]; \
189
+ __kh_set_unused(h->used, j); \
190
+ while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
191
+ khint_t i; \
192
+ i = __kh_h2b(__hash_fn(key), h->salt, new_bits); \
193
+ while (__kh_used(new_used, i)) i = (i + 1U) & new_mask; \
194
+ __kh_set_used(new_used, i); \
195
+ if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ \
196
+ { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
197
+ __kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ \
198
+ } else { /* write the element and jump out of the loop */ \
199
+ h->keys[i] = key; \
200
+ break; \
201
+ } \
202
+ } \
203
+ } \
204
+ if (n_buckets > new_n_buckets) { /* shrink the hash table */ \
205
+ khkey_t *new_keys = Krealloc(h->km, khkey_t, h->keys, new_n_buckets); \
206
+ if (!new_keys) { Kfree(h->km, new_used); return -1; } \
207
+ h->keys = new_keys; \
208
+ } \
209
+ Kfree(h->km, h->used); /* free the working space */ \
210
+ h->used = new_used, h->bits = new_bits; \
211
+ return 0; \
212
+ }
213
+
214
+ #define __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
215
+ SCOPE khint_t prefix##_putp_core(HType *h, const khkey_t *key, khint_t hash, int *absent) { \
216
+ khint_t n_buckets, i, last, mask; \
217
+ n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \
218
+ *absent = -1; \
219
+ if (h->count >= kh_max_count(n_buckets)) { /* rehashing */ \
220
+ if (prefix##_resize(h, n_buckets + 1U) < 0) \
221
+ return n_buckets; \
222
+ n_buckets = (khint_t)1U<<h->bits; \
223
+ } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
224
+ mask = n_buckets - 1; \
225
+ i = last = __kh_h2b(hash, h->salt, h->bits); \
226
+ while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \
227
+ i = (i + 1U) & mask; \
228
+ if (i == last) break; \
229
+ } \
230
+ if (!__kh_used(h->used, i)) { /* not present at all */ \
231
+ h->keys[i] = *key; \
232
+ __kh_set_used(h->used, i); \
233
+ ++h->count; \
234
+ *absent = 1; \
235
+ } else *absent = 0; /* Don't touch h->keys[i] if present */ \
236
+ return i; \
237
+ } \
238
+ SCOPE khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent) { return prefix##_putp_core(h, key, __hash_fn(*key), absent); } \
239
+ SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { return prefix##_putp_core(h, &key, __hash_fn(key), absent); }
240
+
241
+ #define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \
242
+ SCOPE int prefix##_del(HType *h, khint_t i) { \
243
+ khint_t j = i, k, mask, n_buckets; \
244
+ if (h->keys == 0) return 0; \
245
+ n_buckets = (khint_t)1U<<h->bits; \
246
+ mask = n_buckets - 1U; \
247
+ while (1) { \
248
+ j = (j + 1U) & mask; \
249
+ if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ \
250
+ k = __kh_h2b(__hash_fn(h->keys[j]), h->salt, h->bits); \
251
+ if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \
252
+ h->keys[i] = h->keys[j], i = j; \
253
+ } \
254
+ __kh_set_unused(h->used, i); \
255
+ --h->count; \
256
+ return 1; \
257
+ }
258
+
259
+ #define KHASHL_DECLARE(HType, prefix, khkey_t) \
260
+ __KHASHL_TYPE(HType, khkey_t) \
261
+ __KHASHL_PROTOTYPES(HType, prefix, khkey_t)
262
+
263
+ #define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
264
+ __KHASHL_TYPE(HType, khkey_t) \
265
+ __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \
266
+ __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
267
+ __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
268
+ __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
269
+ __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn)
270
+
271
+ /***************************
272
+ * Ensemble of hash tables *
273
+ ***************************/
274
+
275
+ typedef struct {
276
+ khint_t sub, pos;
277
+ } kh_ensitr_t;
278
+
279
+ #define KHASHE_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
280
+ KHASHL_INIT(KH_LOCAL, HType##_sub, prefix##_sub, khkey_t, __hash_fn, __hash_eq) \
281
+ typedef struct HType { \
282
+ void *km; \
283
+ khint64_t count:54, bits:8; \
284
+ HType##_sub *sub; \
285
+ } HType; \
286
+ SCOPE HType *prefix##_init3(void *km, int bits, khint_t seed) { \
287
+ HType *g; \
288
+ g = Kcalloc(km, HType, 1); \
289
+ if (!g) return 0; \
290
+ g->bits = bits, g->km = km; \
291
+ g->sub = Kcalloc(km, HType##_sub, 1U<<bits); \
292
+ if (seed != 0) { \
293
+ khint_t i, rng = seed; \
294
+ for (i = 0; i < 1U<<bits; ++i) \
295
+ g->sub[i].salt = __kh_splitmix32(&rng); \
296
+ } \
297
+ return g; \
298
+ } \
299
+ SCOPE HType *prefix##_init2(void *km, int bits) { return prefix##_init3(km, bits, 0); } \
300
+ SCOPE HType *prefix##_init(int bits) { return prefix##_init2(0, bits); } \
301
+ SCOPE void prefix##_destroy(HType *g) { \
302
+ int t; \
303
+ if (!g) return; \
304
+ for (t = 0; t < 1<<g->bits; ++t) { Kfree(g->km, (void*)g->sub[t].keys); Kfree(g->km, g->sub[t].used); } \
305
+ Kfree(g->km, g->sub); Kfree(g->km, g); \
306
+ } \
307
+ SCOPE kh_ensitr_t prefix##_getp(const HType *g, const khkey_t *key) { \
308
+ khint_t hash, low, ret; \
309
+ kh_ensitr_t r; \
310
+ HType##_sub *h; \
311
+ hash = __hash_fn(*key); \
312
+ low = hash & ((1U<<g->bits) - 1); \
313
+ h = &g->sub[low]; \
314
+ ret = prefix##_sub_getp_core(h, key, hash); \
315
+ if (ret == kh_end(h)) r.sub = low, r.pos = (khint_t)-1; \
316
+ else r.sub = low, r.pos = ret; \
317
+ return r; \
318
+ } \
319
+ SCOPE kh_ensitr_t prefix##_get(const HType *g, const khkey_t key) { return prefix##_getp(g, &key); } \
320
+ SCOPE kh_ensitr_t prefix##_putp(HType *g, const khkey_t *key, int *absent) { \
321
+ khint_t hash, low, ret; \
322
+ kh_ensitr_t r; \
323
+ HType##_sub *h; \
324
+ hash = __hash_fn(*key); \
325
+ low = hash & ((1U<<g->bits) - 1); \
326
+ h = &g->sub[low]; \
327
+ ret = prefix##_sub_putp_core(h, key, hash, absent); \
328
+ if (*absent) ++g->count; \
329
+ r.sub = low, r.pos = ret; \
330
+ return r; \
331
+ } \
332
+ SCOPE kh_ensitr_t prefix##_put(HType *g, const khkey_t key, int *absent) { return prefix##_putp(g, &key, absent); } \
333
+ SCOPE int prefix##_del(HType *g, kh_ensitr_t itr) { \
334
+ HType##_sub *h = &g->sub[itr.sub]; \
335
+ int ret; \
336
+ ret = prefix##_sub_del(h, itr.pos); \
337
+ if (ret) --g->count; \
338
+ return ret; \
339
+ } \
340
+ SCOPE void prefix##_clear(HType *g) { \
341
+ int i; \
342
+ for (i = 0; i < 1U<<g->bits; ++i) prefix##_sub_clear(&g->sub[i]); \
343
+ g->count = 0; \
344
+ } \
345
+ SCOPE void prefix##_resize(HType *g, khint64_t new_n_buckets) { \
346
+ khint_t j; \
347
+ for (j = 0; j < 1U<<g->bits; ++j) \
348
+ prefix##_sub_resize(&g->sub[j], new_n_buckets >> g->bits); \
349
+ }
350
+
351
+ /*****************************
352
+ * More convenient interface *
353
+ *****************************/
354
+
355
+ /* common */
356
+
357
+ #define KHASHL_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
358
+ typedef struct { khkey_t key; } kh_packed HType##_s_bucket_t; \
359
+ static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) { return __hash_fn(x.key); } \
360
+ static kh_inline int prefix##_s_eq(HType##_s_bucket_t x, HType##_s_bucket_t y) { return __hash_eq(x.key, y.key); } \
361
+ KHASHL_INIT(KH_LOCAL, HType, prefix##_s, HType##_s_bucket_t, prefix##_s_hash, prefix##_s_eq) \
362
+ SCOPE HType *prefix##_init(void) { return prefix##_s_init(); } \
363
+ SCOPE HType *prefix##_init2(void *km) { return prefix##_s_init2(km); } \
364
+ SCOPE HType *prefix##_init3(void *km, khint_t seed) { return prefix##_s_init3(km, seed); } \
365
+ SCOPE void prefix##_destroy(HType *h) { prefix##_s_destroy(h); } \
366
+ SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_s_resize(h, new_n_buckets); } \
367
+ SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \
368
+ SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \
369
+ SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); } \
370
+ SCOPE void prefix##_clear(HType *h) { prefix##_s_clear(h); }
371
+
372
+ #define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
373
+ typedef struct { khkey_t key; kh_val_t val; } kh_packed HType##_m_bucket_t; \
374
+ static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \
375
+ static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \
376
+ KHASHL_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \
377
+ SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \
378
+ SCOPE HType *prefix##_init2(void *km) { return prefix##_m_init2(km); } \
379
+ SCOPE HType *prefix##_init3(void *km, khint_t seed) { return prefix##_m_init3(km, seed); } \
380
+ SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \
381
+ SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_m_resize(h, new_n_buckets); } \
382
+ SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \
383
+ SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \
384
+ SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \
385
+ SCOPE void prefix##_clear(HType *h) { prefix##_m_clear(h); }
386
+
387
+ /* cached hashes to trade memory for performance when hashing and comparison are expensive */
388
+
389
+ #define __kh_cached_hash(x) ((x).hash)
390
+
391
+ #define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
392
+ typedef struct { khkey_t key; khint_t hash; } kh_packed HType##_cs_bucket_t; \
393
+ static kh_inline int prefix##_cs_eq(HType##_cs_bucket_t x, HType##_cs_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \
394
+ KHASHL_INIT(KH_LOCAL, HType, prefix##_cs, HType##_cs_bucket_t, __kh_cached_hash, prefix##_cs_eq) \
395
+ SCOPE HType *prefix##_init(void) { return prefix##_cs_init(); } \
396
+ SCOPE void prefix##_destroy(HType *h) { prefix##_cs_destroy(h); } \
397
+ SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cs_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cs_getp(h, &t); } \
398
+ SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cs_del(h, k); } \
399
+ SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cs_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cs_putp(h, &t, absent); } \
400
+ SCOPE void prefix##_clear(HType *h) { prefix##_cs_clear(h); }
401
+
402
+ #define KHASHL_CMAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
403
+ typedef struct { khkey_t key; kh_val_t val; khint_t hash; } kh_packed HType##_cm_bucket_t; \
404
+ static kh_inline int prefix##_cm_eq(HType##_cm_bucket_t x, HType##_cm_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \
405
+ KHASHL_INIT(KH_LOCAL, HType, prefix##_cm, HType##_cm_bucket_t, __kh_cached_hash, prefix##_cm_eq) \
406
+ SCOPE HType *prefix##_init(void) { return prefix##_cm_init(); } \
407
+ SCOPE void prefix##_destroy(HType *h) { prefix##_cm_destroy(h); } \
408
+ SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cm_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cm_getp(h, &t); } \
409
+ SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cm_del(h, k); } \
410
+ SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cm_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cm_putp(h, &t, absent); } \
411
+ SCOPE void prefix##_clear(HType *h) { prefix##_cm_clear(h); }
412
+
413
+ /* ensemble for huge hash tables */
414
+
415
+ #define KHASHE_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
416
+ typedef struct { khkey_t key; } kh_packed HType##_es_bucket_t; \
417
+ static kh_inline khint_t prefix##_es_hash(HType##_es_bucket_t x) { return __hash_fn(x.key); } \
418
+ static kh_inline int prefix##_es_eq(HType##_es_bucket_t x, HType##_es_bucket_t y) { return __hash_eq(x.key, y.key); } \
419
+ KHASHE_INIT(KH_LOCAL, HType, prefix##_es, HType##_es_bucket_t, prefix##_es_hash, prefix##_es_eq) \
420
+ SCOPE HType *prefix##_init(int bits) { return prefix##_es_init(bits); } \
421
+ SCOPE void prefix##_destroy(HType *h) { prefix##_es_destroy(h); } \
422
+ SCOPE void prefix##_resize(HType *h, khint64_t new_n_buckets) { prefix##_es_resize(h, new_n_buckets); } \
423
+ SCOPE kh_ensitr_t prefix##_get(const HType *h, khkey_t key) { HType##_es_bucket_t t; t.key = key; return prefix##_es_getp(h, &t); } \
424
+ SCOPE int prefix##_del(HType *h, kh_ensitr_t k) { return prefix##_es_del(h, k); } \
425
+ SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_es_bucket_t t; t.key = key; return prefix##_es_putp(h, &t, absent); } \
426
+ SCOPE void prefix##_clear(HType *h) { prefix##_es_clear(h); }
427
+
428
+ #define KHASHE_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
429
+ typedef struct { khkey_t key; kh_val_t val; } kh_packed HType##_em_bucket_t; \
430
+ static kh_inline khint_t prefix##_em_hash(HType##_em_bucket_t x) { return __hash_fn(x.key); } \
431
+ static kh_inline int prefix##_em_eq(HType##_em_bucket_t x, HType##_em_bucket_t y) { return __hash_eq(x.key, y.key); } \
432
+ KHASHE_INIT(KH_LOCAL, HType, prefix##_em, HType##_em_bucket_t, prefix##_em_hash, prefix##_em_eq) \
433
+ SCOPE HType *prefix##_init(int bits) { return prefix##_em_init(bits); } \
434
+ SCOPE void prefix##_destroy(HType *h) { prefix##_em_destroy(h); } \
435
+ SCOPE void prefix##_resize(HType *h, khint64_t new_n_buckets) { prefix##_em_resize(h, new_n_buckets); } \
436
+ SCOPE kh_ensitr_t prefix##_get(const HType *h, khkey_t key) { HType##_em_bucket_t t; t.key = key; return prefix##_em_getp(h, &t); } \
437
+ SCOPE int prefix##_del(HType *h, kh_ensitr_t k) { return prefix##_em_del(h, k); } \
438
+ SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_em_bucket_t t; t.key = key; return prefix##_em_putp(h, &t, absent); } \
439
+ SCOPE void prefix##_clear(HType *h) { prefix##_em_clear(h); }
440
+
441
+ /**************************
442
+ * Public macro functions *
443
+ **************************/
444
+
445
+ #define kh_bucket(h, x) ((h)->keys[x])
446
+ #define kh_size(h) ((h)->count)
447
+ #define kh_capacity(h) ((h)->keys? 1U<<(h)->bits : 0U)
448
+ #define kh_end(h) kh_capacity(h)
449
+
450
+ #define kh_key(h, x) ((h)->keys[x].key)
451
+ #define kh_val(h, x) ((h)->keys[x].val)
452
+ #define kh_exist(h, x) __kh_used((h)->used, (x))
453
+
454
+ #define kh_foreach(h, x) for ((x) = 0; (x) != kh_end(h); ++(x)) if (kh_exist((h), (x)))
455
+
456
+ #define kh_ens_key(g, x) kh_key(&(g)->sub[(x).sub], (x).pos)
457
+ #define kh_ens_val(g, x) kh_val(&(g)->sub[(x).sub], (x).pos)
458
+ #define kh_ens_exist(g, x) kh_exist(&(g)->sub[(x).sub], (x).pos)
459
+ #define kh_ens_is_end(x) ((x).pos == (khint_t)-1)
460
+ #define kh_ens_size(g) ((g)->count)
461
+
462
+ #define kh_ens_foreach(g, x) for ((x).sub = 0; (x).sub != 1<<(g)->bits; ++(x).sub) for ((x).pos = 0; (x).pos != kh_end(&(g)->sub[(x).sub]); ++(x).pos) if (kh_ens_exist((g), (x)))
463
+
464
+ /**************************************
465
+ * Common hash and equality functions *
466
+ **************************************/
467
+
468
+ #define kh_eq_generic(a, b) ((a) == (b))
469
+ #define kh_eq_str(a, b) (strcmp((a), (b)) == 0)
470
+ #define kh_hash_dummy(x) ((khint_t)(x))
471
+
472
+ static kh_inline khint_t kh_hash_uint32(khint_t x) { /* murmur finishing */
473
+ x ^= x >> 16;
474
+ x *= 0x85ebca6bU;
475
+ x ^= x >> 13;
476
+ x *= 0xc2b2ae35U;
477
+ x ^= x >> 16;
478
+ return x;
479
+ }
480
+
481
+ static kh_inline khint_t kh_hash_uint64(khint64_t x) { /* splitmix64; see https://nullprogram.com/blog/2018/07/31/ for inversion */
482
+ x ^= x >> 30;
483
+ x *= 0xbf58476d1ce4e5b9ULL;
484
+ x ^= x >> 27;
485
+ x *= 0x94d049bb133111ebULL;
486
+ x ^= x >> 31;
487
+ return (khint_t)x;
488
+ }
489
+
490
+ static kh_inline khint_t kh_hash_str(kh_cstr_t s) { /* FNV1a */
491
+ khint_t h = 2166136261U;
492
+ const unsigned char *t = (const unsigned char*)s;
493
+ for (; *t; ++t)
494
+ h ^= *t, h *= 16777619;
495
+ return h;
496
+ }
497
+
498
+ static kh_inline khint_t kh_hash_bytes(int len, const unsigned char *s) {
499
+ khint_t h = 2166136261U;
500
+ int i;
501
+ for (i = 0; i < len; ++i)
502
+ h ^= s[i], h *= 16777619;
503
+ return h;
504
+ }
505
+
506
+ #endif /* __AC_KHASHL_H */
data/ext/pf2/serializer.c CHANGED
@@ -85,47 +85,120 @@ pf2_ser_prepare(struct pf2_ser *serializer, struct pf2_session *session) {
85
85
  serializer->dropped_sample_count =
86
86
  atomic_load_explicit(&session->dropped_sample_count, memory_order_relaxed);
87
87
 
88
- // Process samples
89
- for (size_t i = 0; i < session->samples_index; i++) {
90
- struct pf2_sample *sample = &session->samples[i];
91
- ensure_samples_capacity(serializer);
92
-
93
- struct pf2_ser_sample *ser_sample = &serializer->samples[serializer->samples_count++];
94
- ser_sample->ruby_thread_id = (uintptr_t)sample->context_pthread;
95
- ser_sample->elapsed_ns = sample->timestamp_ns - serializer->start_timestamp_ns;
96
-
97
- // Copy and process Ruby stack frames
98
- ser_sample->stack = malloc(sizeof(size_t) * sample->depth);
99
- ser_sample->stack_count = sample->depth;
100
- for (int j = 0; j < sample->depth; j++) {
101
- VALUE frame = sample->cmes[j];
102
- int32_t lineno = sample->linenos[j];
103
-
104
- struct pf2_ser_function func = extract_function_from_ruby_frame(frame);
105
- size_t function_index = function_index_for(serializer, &func);
106
- size_t location_index = location_index_for(serializer, function_index, lineno);
107
-
108
- ser_sample->stack[j] = location_index;
88
+ // ---------------------------------------------------------------------
89
+ // Build locations/functions from the session's interning tables
90
+ // ---------------------------------------------------------------------
91
+ size_t location_table_size = kh_size(session->location_table);
92
+ if (location_table_size > serializer->locations_capacity) {
93
+ serializer->locations_capacity = location_table_size;
94
+ serializer->locations = realloc(
95
+ serializer->locations,
96
+ serializer->locations_capacity * sizeof(struct pf2_ser_location)
97
+ );
98
+ }
99
+
100
+ khint_t k;
101
+ kh_foreach(session->location_table, k) {
102
+ size_t location_id = kh_val(session->location_table, k);
103
+ VALUE cme = kh_key(session->location_table, k).cme;
104
+ int lineno = kh_key(session->location_table, k).lineno;
105
+
106
+ struct pf2_ser_function func = extract_function_from_ruby_frame(cme);
107
+ size_t function_index = function_index_for(serializer, &func);
108
+
109
+ // location ids are assigned sequentially in intern_location, so we can
110
+ // place them directly by id.
111
+ serializer->locations[location_id].function_index = function_index;
112
+ serializer->locations[location_id].lineno = lineno;
113
+ serializer->locations[location_id].address = 0;
114
+ }
115
+ serializer->locations_count = location_table_size;
116
+
117
+ // ---------------------------------------------------------------------
118
+ // Precompute stack/native stack lookups by id for fast access
119
+ // ---------------------------------------------------------------------
120
+ size_t ruby_stack_count = kh_size(session->stack_table);
121
+ struct pf2_stack_key *ruby_stacks = NULL;
122
+ if (ruby_stack_count > 0) {
123
+ ruby_stacks = malloc(sizeof(struct pf2_stack_key) * ruby_stack_count);
124
+ kh_foreach(session->stack_table, k) {
125
+ size_t stack_id = kh_val(session->stack_table, k);
126
+ ruby_stacks[stack_id] = kh_key(session->stack_table, k);
109
127
  }
128
+ }
110
129
 
111
- // Copy and process native stack frames, if any
112
- if (sample->native_stack_depth > 0) {
113
- ser_sample->native_stack = malloc(sizeof(size_t) * sample->native_stack_depth);
114
- ser_sample->native_stack_count = sample->native_stack_depth;
130
+ size_t native_stack_count = kh_size(session->native_stack_table);
131
+ struct pf2_native_stack_key *native_stacks = NULL;
132
+ if (native_stack_count > 0) {
133
+ native_stacks = malloc(sizeof(struct pf2_native_stack_key) * native_stack_count);
134
+ kh_foreach(session->native_stack_table, k) {
135
+ size_t stack_id = kh_val(session->native_stack_table, k);
136
+ native_stacks[stack_id] = kh_key(session->native_stack_table, k);
137
+ }
138
+ }
115
139
 
116
- for (size_t j = 0; j < sample->native_stack_depth; j++) {
117
- struct pf2_ser_function func = extract_function_from_native_pc(sample->native_stack[j]);
118
- size_t function_index = function_index_for(serializer, &func);
119
- size_t location_index = location_index_for(serializer, function_index, 0);
140
+ // ---------------------------------------------------------------------
141
+ // Process aggregated sample_table entries into serializer samples
142
+ // ---------------------------------------------------------------------
143
+ size_t total_samples = 0;
144
+ kh_foreach(session->sample_table, k) {
145
+ total_samples += kh_val(session->sample_table, k).timestamps_count;
146
+ }
147
+ if (total_samples > serializer->samples_capacity) {
148
+ serializer->samples_capacity = total_samples;
149
+ serializer->samples = realloc(
150
+ serializer->samples,
151
+ serializer->samples_capacity * sizeof(struct pf2_ser_sample)
152
+ );
153
+ }
120
154
 
121
- ser_sample->native_stack[j] = location_index;
155
+ kh_foreach(session->sample_table, k) {
156
+ struct pf2_combined_stack_key ckey = kh_key(session->sample_table, k);
157
+ struct pf2_sample_stats *stats = &kh_val(session->sample_table, k);
158
+
159
+ const struct pf2_stack_key *ruby_stack = ruby_stacks ? &ruby_stacks[ckey.ruby_stack_id] : NULL;
160
+ const struct pf2_native_stack_key *native_stack = native_stacks ? &native_stacks[ckey.native_stack_id] : NULL;
161
+
162
+ for (size_t t = 0; t < stats->timestamps_count; t++) {
163
+ ensure_samples_capacity(serializer);
164
+ struct pf2_ser_sample *ser_sample = &serializer->samples[serializer->samples_count++];
165
+
166
+ ser_sample->ruby_thread_id = stats->thread_ids ? stats->thread_ids[t] : 0;
167
+ ser_sample->elapsed_ns = stats->timestamps[t] - serializer->start_timestamp_ns;
168
+
169
+ // Ruby stack
170
+ if (ruby_stack && ruby_stack->depth > 0) {
171
+ ser_sample->stack = malloc(sizeof(size_t) * ruby_stack->depth);
172
+ ser_sample->stack_count = ruby_stack->depth;
173
+ for (size_t j = 0; j < ruby_stack->depth; j++) {
174
+ // location ids map directly to indices in serializer->locations
175
+ ser_sample->stack[j] = ruby_stack->frames[j];
176
+ }
177
+ } else {
178
+ ser_sample->stack = NULL;
179
+ ser_sample->stack_count = 0;
122
180
  }
123
- } else {
124
- ser_sample->native_stack = NULL;
125
- ser_sample->native_stack_count = 0;
126
- }
127
181
 
182
+ // Native stack
183
+ if (native_stack && native_stack->depth > 0) {
184
+ ser_sample->native_stack = malloc(sizeof(size_t) * native_stack->depth);
185
+ ser_sample->native_stack_count = native_stack->depth;
186
+
187
+ for (size_t j = 0; j < native_stack->depth; j++) {
188
+ struct pf2_ser_function func = extract_function_from_native_pc(native_stack->frames[j]);
189
+ size_t function_index = function_index_for(serializer, &func);
190
+ size_t location_index = location_index_for(serializer, function_index, 0);
191
+ ser_sample->native_stack[j] = location_index;
192
+ }
193
+ } else {
194
+ ser_sample->native_stack = NULL;
195
+ ser_sample->native_stack_count = 0;
196
+ }
197
+ }
128
198
  }
199
+
200
+ free(ruby_stacks);
201
+ free(native_stacks);
129
202
  }
130
203
 
131
204
  VALUE
@@ -164,7 +237,7 @@ pf2_ser_to_ruby_hash(struct pf2_ser *serializer) {
164
237
  rb_hash_aset(
165
238
  sample_hash,
166
239
  ID2SYM(rb_intern("ruby_thread_id")),
167
- sample->ruby_thread_id ? ULL2NUM(sample->ruby_thread_id) : Qnil
240
+ ULL2NUM(sample->ruby_thread_id)
168
241
  );
169
242
  rb_hash_aset(sample_hash, ID2SYM(rb_intern("elapsed_ns")), ULL2NUM(sample->elapsed_ns));
170
243