hirlite 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +15 -0
  2. data/LICENSE +28 -0
  3. data/Rakefile +51 -0
  4. data/ext/hirlite_ext/extconf.rb +33 -0
  5. data/ext/hirlite_ext/hirlite_ext.c +14 -0
  6. data/ext/hirlite_ext/hirlite_ext.h +38 -0
  7. data/ext/hirlite_ext/rlite.c +351 -0
  8. data/lib/hirlite/rlite.rb +1 -0
  9. data/lib/hirlite/version.rb +3 -0
  10. data/lib/hirlite.rb +2 -0
  11. data/vendor/rlite/Makefile +6 -0
  12. data/vendor/rlite/deps/crc64.c +191 -0
  13. data/vendor/rlite/deps/crc64.h +3 -0
  14. data/vendor/rlite/deps/endianconv.h +73 -0
  15. data/vendor/rlite/deps/hyperloglog.c +1547 -0
  16. data/vendor/rlite/deps/hyperloglog.h +14 -0
  17. data/vendor/rlite/deps/lzf.h +100 -0
  18. data/vendor/rlite/deps/lzfP.h +159 -0
  19. data/vendor/rlite/deps/lzf_c.c +295 -0
  20. data/vendor/rlite/deps/lzf_d.c +150 -0
  21. data/vendor/rlite/deps/sha1.c +227 -0
  22. data/vendor/rlite/deps/sha1.h +19 -0
  23. data/vendor/rlite/deps/utilfromredis.c +397 -0
  24. data/vendor/rlite/deps/utilfromredis.h +11 -0
  25. data/vendor/rlite/src/Makefile +79 -0
  26. data/vendor/rlite/src/constants.h +15 -0
  27. data/vendor/rlite/src/dump.c +191 -0
  28. data/vendor/rlite/src/dump.h +3 -0
  29. data/vendor/rlite/src/hirlite.c +3985 -0
  30. data/vendor/rlite/src/hirlite.h +186 -0
  31. data/vendor/rlite/src/page_btree.c +1556 -0
  32. data/vendor/rlite/src/page_btree.h +133 -0
  33. data/vendor/rlite/src/page_key.c +283 -0
  34. data/vendor/rlite/src/page_key.h +25 -0
  35. data/vendor/rlite/src/page_list.c +718 -0
  36. data/vendor/rlite/src/page_list.h +70 -0
  37. data/vendor/rlite/src/page_long.c +61 -0
  38. data/vendor/rlite/src/page_long.h +14 -0
  39. data/vendor/rlite/src/page_multi_string.c +538 -0
  40. data/vendor/rlite/src/page_multi_string.h +18 -0
  41. data/vendor/rlite/src/page_skiplist.c +689 -0
  42. data/vendor/rlite/src/page_skiplist.h +70 -0
  43. data/vendor/rlite/src/page_string.c +55 -0
  44. data/vendor/rlite/src/page_string.h +12 -0
  45. data/vendor/rlite/src/pqsort.c +185 -0
  46. data/vendor/rlite/src/pqsort.h +40 -0
  47. data/vendor/rlite/src/restore.c +401 -0
  48. data/vendor/rlite/src/restore.h +3 -0
  49. data/vendor/rlite/src/rlite.c +1309 -0
  50. data/vendor/rlite/src/rlite.h +159 -0
  51. data/vendor/rlite/src/sort.c +530 -0
  52. data/vendor/rlite/src/sort.h +18 -0
  53. data/vendor/rlite/src/status.h +19 -0
  54. data/vendor/rlite/src/type_hash.c +607 -0
  55. data/vendor/rlite/src/type_hash.h +29 -0
  56. data/vendor/rlite/src/type_list.c +477 -0
  57. data/vendor/rlite/src/type_list.h +23 -0
  58. data/vendor/rlite/src/type_set.c +796 -0
  59. data/vendor/rlite/src/type_set.h +34 -0
  60. data/vendor/rlite/src/type_string.c +613 -0
  61. data/vendor/rlite/src/type_string.h +34 -0
  62. data/vendor/rlite/src/type_zset.c +1147 -0
  63. data/vendor/rlite/src/type_zset.h +50 -0
  64. data/vendor/rlite/src/util.c +334 -0
  65. data/vendor/rlite/src/util.h +71 -0
  66. metadata +151 -0
@@ -0,0 +1,1547 @@
1
+ /* hyperloglog.c - Redis HyperLogLog probabilistic cardinality approximation.
2
+ * This file implements the algorithm and the exported Redis commands.
3
+ *
4
+ * Copyright (c) 2014, Salvatore Sanfilippo <antirez at gmail dot com>
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ *
10
+ * * Redistributions of source code must retain the above copyright notice,
11
+ * this list of conditions and the following disclaimer.
12
+ * * Redistributions in binary form must reproduce the above copyright
13
+ * notice, this list of conditions and the following disclaimer in the
14
+ * documentation and/or other materials provided with the distribution.
15
+ * * Neither the name of Redis nor the names of its contributors may be used
16
+ * to endorse or promote products derived from this software without
17
+ * specific prior written permission.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
+ * POSSIBILITY OF SUCH DAMAGE.
30
+ */
31
+
32
+ #include "hyperloglog.h"
33
+ #include <stdlib.h>
34
+ #include <stdio.h>
35
+ #include <string.h>
36
+ #include <stdint.h>
37
+ #include <math.h>
38
+
39
+ /* The Redis HyperLogLog implementation is based on the following ideas:
40
+ *
41
+ * * The use of a 64 bit hash function as proposed in [1], in order to don't
42
+ * limited to cardinalities up to 10^9, at the cost of just 1 additional
43
+ * bit per register.
44
+ * * The use of 16384 6-bit registers for a great level of accuracy, using
45
+ * a total of 12k per key.
46
+ * * The use of the Redis string data type. No new type is introduced.
47
+ * * No attempt is made to compress the data structure as in [1]. Also the
48
+ * algorithm used is the original HyperLogLog Algorithm as in [2], with
49
+ * the only difference that a 64 bit hash function is used, so no correction
50
+ * is performed for values near 2^32 as in [1].
51
+ *
52
+ * [1] Heule, Nunkesser, Hall: HyperLogLog in Practice: Algorithmic
53
+ * Engineering of a State of The Art Cardinality Estimation Algorithm.
54
+ *
55
+ * [2] P. Flajolet, Éric Fusy, O. Gandouet, and F. Meunier. Hyperloglog: The
56
+ * analysis of a near-optimal cardinality estimation algorithm.
57
+ *
58
+ * Redis uses two representations:
59
+ *
60
+ * 1) A "dense" representation where every entry is represented by
61
+ * a 6-bit integer.
62
+ * 2) A "sparse" representation using run length compression suitable
63
+ * for representing HyperLogLogs with many registers set to 0 in
64
+ * a memory efficient way.
65
+ *
66
+ *
67
+ * HLL header
68
+ * ===
69
+ *
70
+ * Both the dense and sparse representation have a 16 byte header as follows:
71
+ *
72
+ * +------+---+-----+----------+
73
+ * | HYLL | E | N/U | Cardin. |
74
+ * +------+---+-----+----------+
75
+ *
76
+ * The first 4 bytes are a magic string set to the bytes "HYLL".
77
+ * "E" is one byte encoding, currently set to HLL_DENSE or
78
+ * HLL_SPARSE. N/U are three not used bytes.
79
+ *
80
+ * The "Cardin." field is a 64 bit integer stored in little endian format
81
+ * with the latest cardinality computed that can be reused if the data
82
+ * structure was not modified since the last computation (this is useful
83
+ * because there are high probabilities that HLLADD operations don't
84
+ * modify the actual data structure and hence the approximated cardinality).
85
+ *
86
+ * When the most significant bit in the most significant byte of the cached
87
+ * cardinality is set, it means that the data structure was modified and
88
+ * we can't reuse the cached value that must be recomputed.
89
+ *
90
+ * Dense representation
91
+ * ===
92
+ *
93
+ * The dense representation used by Redis is the following:
94
+ *
95
+ * +--------+--------+--------+------// //--+
96
+ * |11000000|22221111|33333322|55444444 .... |
97
+ * +--------+--------+--------+------// //--+
98
+ *
99
+ * The 6 bits counters are encoded one after the other starting from the
100
+ * LSB to the MSB, and using the next bytes as needed.
101
+ *
102
+ * Sparse representation
103
+ * ===
104
+ *
105
+ * The sparse representation encodes registers using a run length
106
+ * encoding composed of three opcodes, two using one byte, and one using
107
+ * of two bytes. The opcodes are called ZERO, XZERO and VAL.
108
+ *
109
+ * ZERO opcode is represented as 00xxxxxx. The 6-bit integer represented
110
+ * by the six bits 'xxxxxx', plus 1, means that there are N registers set
111
+ * to 0. This opcode can represent from 1 to 64 contiguous registers set
112
+ * to the value of 0.
113
+ *
114
+ * XZERO opcode is represented by two bytes 01xxxxxx yyyyyyyy. The 14-bit
115
+ * integer represented by the bits 'xxxxxx' as most significant bits and
116
+ * 'yyyyyyyy' as least significant bits, plus 1, means that there are N
117
+ * registers set to 0. This opcode can represent from 0 to 16384 contiguous
118
+ * registers set to the value of 0.
119
+ *
120
+ * VAL opcode is represented as 1vvvvvxx. It contains a 5-bit integer
121
+ * representing the value of a register, and a 2-bit integer representing
122
+ * the number of contiguous registers set to that value 'vvvvv'.
123
+ * To obtain the value and run length, the integers vvvvv and xx must be
124
+ * incremented by one. This opcode can represent values from 1 to 32,
125
+ * repeated from 1 to 4 times.
126
+ *
127
+ * The sparse representation can't represent registers with a value greater
128
+ * than 32, however it is very unlikely that we find such a register in an
129
+ * HLL with a cardinality where the sparse representation is still more
130
+ * memory efficient than the dense representation. When this happens the
131
+ * HLL is converted to the dense representation.
132
+ *
133
+ * The sparse representation is purely positional. For example a sparse
134
+ * representation of an empty HLL is just: XZERO:16384.
135
+ *
136
+ * An HLL having only 3 non-zero registers at position 1000, 1020, 1021
137
+ * respectively set to 2, 3, 3, is represented by the following three
138
+ * opcodes:
139
+ *
140
+ * XZERO:1000 (Registers 0-999 are set to 0)
141
+ * VAL:2,1 (1 register set to value 2, that is register 1000)
142
+ * ZERO:19 (Registers 1001-1019 set to 0)
143
+ * VAL:3,2 (2 registers set to value 3, that is registers 1020,1021)
144
+ * XZERO:15362 (Registers 1022-16383 set to 0)
145
+ *
146
+ * In the example the sparse representation used just 7 bytes instead
147
+ * of 12k in order to represent the HLL registers. In general for low
148
+ * cardinality there is a big win in terms of space efficiency, traded
149
+ * with CPU time since the sparse representation is slower to access:
150
+ *
151
+ * The following table shows average cardinality vs bytes used, 100
152
+ * samples per cardinality (when the set was not representable because
153
+ * of registers with too big value, the dense representation size was used
154
+ * as a sample).
155
+ *
156
+ * 100 267
157
+ * 200 485
158
+ * 300 678
159
+ * 400 859
160
+ * 500 1033
161
+ * 600 1205
162
+ * 700 1375
163
+ * 800 1544
164
+ * 900 1713
165
+ * 1000 1882
166
+ * 2000 3480
167
+ * 3000 4879
168
+ * 4000 6089
169
+ * 5000 7138
170
+ * 6000 8042
171
+ * 7000 8823
172
+ * 8000 9500
173
+ * 9000 10088
174
+ * 10000 10591
175
+ *
176
+ * The dense representation uses 12288 bytes, so there is a big win up to
177
+ * a cardinality of ~2000-3000. For bigger cardinalities the constant times
178
+ * involved in updating the sparse representation is not justified by the
179
+ * memory savings. The exact maximum length of the sparse representation
180
+ * when this implementation switches to the dense representation is
181
+ * configured via the define server.hll_sparse_max_bytes.
182
+ */
183
+
184
+ struct hllhdr {
185
+ char magic[4]; /* "HYLL" */
186
+ uint8_t encoding; /* HLL_DENSE or HLL_SPARSE. */
187
+ uint8_t notused[3]; /* Reserved for future use, must be zero. */
188
+ uint8_t card[8]; /* Cached cardinality, little endian. */
189
+ uint8_t registers[]; /* Data bytes. */
190
+ };
191
+
192
+ /* The cached cardinality MSB is used to signal validity of the cached value. */
193
+ #define HLL_INVALIDATE_CACHE(hdr) (hdr)->card[7] |= (1<<7)
194
+ #define HLL_VALID_CACHE(hdr) (((hdr)->card[7] & (1<<7)) == 0)
195
+
196
+ #define HLL_P 14 /* The greater is P, the smaller the error. */
197
+ #define HLL_REGISTERS (1<<HLL_P) /* With P=14, 16384 registers. */
198
+ #define HLL_P_MASK (HLL_REGISTERS-1) /* Mask to index register. */
199
+ #define HLL_BITS 6 /* Enough to count up to 63 leading zeroes. */
200
+ #define HLL_REGISTER_MAX ((1<<HLL_BITS)-1)
201
+ #define HLL_HDR_SIZE sizeof(struct hllhdr)
202
+ #define HLL_DENSE_SIZE (HLL_HDR_SIZE+((HLL_REGISTERS*HLL_BITS+7)/8))
203
+ #define HLL_DENSE 0 /* Dense encoding. */
204
+ #define HLL_SPARSE 1 /* Sparse encoding. */
205
+ #define HLL_RAW 255 /* Only used internally, never exposed. */
206
+ #define HLL_MAX_ENCODING 1
207
+
208
+ size_t rl_hll_sparse_max_bytes = 3000;
209
+
210
+ /* =========================== Low level bit macros ========================= */
211
+
212
+ /* Macros to access the dense representation.
213
+ *
214
+ * We need to get and set 6 bit counters in an array of 8 bit bytes.
215
+ * We use macros to make sure the code is inlined since speed is critical
216
+ * especially in order to compute the approximated cardinality in
217
+ * HLLCOUNT where we need to access all the registers at once.
218
+ * For the same reason we also want to avoid conditionals in this code path.
219
+ *
220
+ * +--------+--------+--------+------//
221
+ * |11000000|22221111|33333322|55444444
222
+ * +--------+--------+--------+------//
223
+ *
224
+ * Note: in the above representation the most significant bit (MSB)
225
+ * of every byte is on the left. We start using bits from the LSB to MSB,
226
+ * and so forth passing to the next byte.
227
+ *
228
+ * Example, we want to access to counter at pos = 1 ("111111" in the
229
+ * illustration above).
230
+ *
231
+ * The index of the first byte b0 containing our data is:
232
+ *
233
+ * b0 = 6 * pos / 8 = 0
234
+ *
235
+ * +--------+
236
+ * |11000000| <- Our byte at b0
237
+ * +--------+
238
+ *
239
+ * The position of the first bit (counting from the LSB = 0) in the byte
240
+ * is given by:
241
+ *
242
+ * fb = 6 * pos % 8 -> 6
243
+ *
244
+ * Right shift b0 of 'fb' bits.
245
+ *
246
+ * +--------+
247
+ * |11000000| <- Initial value of b0
248
+ * |00000011| <- After right shift of 6 pos.
249
+ * +--------+
250
+ *
251
+ * Left shift b1 of bits 8-fb bits (2 bits)
252
+ *
253
+ * +--------+
254
+ * |22221111| <- Initial value of b1
255
+ * |22111100| <- After left shift of 2 bits.
256
+ * +--------+
257
+ *
258
+ * OR the two bits, and finally AND with 111111 (63 in decimal) to
259
+ * clean the higher order bits we are not interested in:
260
+ *
261
+ * +--------+
262
+ * |00000011| <- b0 right shifted
263
+ * |22111100| <- b1 left shifted
264
+ * |22111111| <- b0 OR b1
265
+ * | 111111| <- (b0 OR b1) AND 63, our value.
266
+ * +--------+
267
+ *
268
+ * We can try with a different example, like pos = 0. In this case
269
+ * the 6-bit counter is actually contained in a single byte.
270
+ *
271
+ * b0 = 6 * pos / 8 = 0
272
+ *
273
+ * +--------+
274
+ * |11000000| <- Our byte at b0
275
+ * +--------+
276
+ *
277
+ * fb = 6 * pos % 8 = 0
278
+ *
279
+ * So we right shift of 0 bits (no shift in practice) and
280
+ * left shift the next byte of 8 bits, even if we don't use it,
281
+ * but this has the effect of clearing the bits so the result
282
+ * will not be affacted after the OR.
283
+ *
284
+ * -------------------------------------------------------------------------
285
+ *
286
+ * Setting the register is a bit more complex, let's assume that 'val'
287
+ * is the value we want to set, already in the right range.
288
+ *
289
+ * We need two steps, in one we need to clear the bits, and in the other
290
+ * we need to bitwise-OR the new bits.
291
+ *
292
+ * Let's try with 'pos' = 1, so our first byte at 'b' is 0,
293
+ *
294
+ * "fb" is 6 in this case.
295
+ *
296
+ * +--------+
297
+ * |11000000| <- Our byte at b0
298
+ * +--------+
299
+ *
300
+ * To create a AND-mask to clear the bits about this position, we just
301
+ * initialize the mask with the value 63, left shift it of "fs" bits,
302
+ * and finally invert the result.
303
+ *
304
+ * +--------+
305
+ * |00111111| <- "mask" starts at 63
306
+ * |11000000| <- "mask" after left shift of "ls" bits.
307
+ * |00111111| <- "mask" after invert.
308
+ * +--------+
309
+ *
310
+ * Now we can bitwise-AND the byte at "b" with the mask, and bitwise-OR
311
+ * it with "val" left-shifted of "ls" bits to set the new bits.
312
+ *
313
+ * Now let's focus on the next byte b1:
314
+ *
315
+ * +--------+
316
+ * |22221111| <- Initial value of b1
317
+ * +--------+
318
+ *
319
+ * To build the AND mask we start again with the 63 value, right shift
320
+ * it by 8-fb bits, and invert it.
321
+ *
322
+ * +--------+
323
+ * |00111111| <- "mask" set at 2&6-1
324
+ * |00001111| <- "mask" after the right shift by 8-fb = 2 bits
325
+ * |11110000| <- "mask" after bitwise not.
326
+ * +--------+
327
+ *
328
+ * Now we can mask it with b+1 to clear the old bits, and bitwise-OR
329
+ * with "val" left-shifted by "rs" bits to set the new value.
330
+ */
331
+
332
+ /* Note: if we access the last counter, we will also access the b+1 byte
333
+ * that is out of the array, but sds strings always have an implicit null
334
+ * term, so the byte exists, and we can skip the conditional (or the need
335
+ * to allocate 1 byte more explicitly). */
336
+
337
+ /* Store the value of the register at position 'regnum' into variable 'target'.
338
+ * 'p' is an array of unsigned bytes. */
339
+ #define HLL_DENSE_GET_REGISTER(target,p,regnum) do { \
340
+ uint8_t *_p = (uint8_t*) p; \
341
+ unsigned long _byte = regnum*HLL_BITS/8; \
342
+ unsigned long _fb = regnum*HLL_BITS&7; \
343
+ unsigned long _fb8 = 8 - _fb; \
344
+ unsigned long b0 = _p[_byte]; \
345
+ unsigned long b1 = _p[_byte+1]; \
346
+ target = ((b0 >> _fb) | (b1 << _fb8)) & HLL_REGISTER_MAX; \
347
+ } while(0)
348
+
349
+ /* Set the value of the register at position 'regnum' to 'val'.
350
+ * 'p' is an array of unsigned bytes. */
351
+ #define HLL_DENSE_SET_REGISTER(p,regnum,val) do { \
352
+ uint8_t *_p = (uint8_t*) p; \
353
+ unsigned long _byte = regnum*HLL_BITS/8; \
354
+ unsigned long _fb = regnum*HLL_BITS&7; \
355
+ unsigned long _fb8 = 8 - _fb; \
356
+ unsigned long _v = val; \
357
+ _p[_byte] &= ~(HLL_REGISTER_MAX << _fb); \
358
+ _p[_byte] |= _v << _fb; \
359
+ _p[_byte+1] &= ~(HLL_REGISTER_MAX >> _fb8); \
360
+ _p[_byte+1] |= _v >> _fb8; \
361
+ } while(0)
362
+
363
+ /* Macros to access the sparse representation.
364
+ * The macros parameter is expected to be an uint8_t pointer. */
365
+ #define HLL_SPARSE_XZERO_BIT 0x40 /* 01xxxxxx */
366
+ #define HLL_SPARSE_VAL_BIT 0x80 /* 1vvvvvxx */
367
+ #define HLL_SPARSE_IS_ZERO(p) (((*(p)) & 0xc0) == 0) /* 00xxxxxx */
368
+ #define HLL_SPARSE_IS_XZERO(p) (((*(p)) & 0xc0) == HLL_SPARSE_XZERO_BIT)
369
+ #define HLL_SPARSE_IS_VAL(p) ((*(p)) & HLL_SPARSE_VAL_BIT)
370
+ #define HLL_SPARSE_ZERO_LEN(p) (((*(p)) & 0x3f)+1)
371
+ #define HLL_SPARSE_XZERO_LEN(p) (((((*(p)) & 0x3f) << 8) | (*((p)+1)))+1)
372
+ #define HLL_SPARSE_VAL_VALUE(p) ((((*(p)) >> 2) & 0x1f)+1)
373
+ #define HLL_SPARSE_VAL_LEN(p) (((*(p)) & 0x3)+1)
374
+ #define HLL_SPARSE_VAL_MAX_VALUE 32
375
+ #define HLL_SPARSE_VAL_MAX_LEN 4
376
+ #define HLL_SPARSE_ZERO_MAX_LEN 64
377
+ #define HLL_SPARSE_XZERO_MAX_LEN 16384
378
+ #define HLL_SPARSE_VAL_SET(p,val,len) do { \
379
+ *(p) = (((val)-1)<<2|((len)-1))|HLL_SPARSE_VAL_BIT; \
380
+ } while(0)
381
+ #define HLL_SPARSE_ZERO_SET(p,len) do { \
382
+ *(p) = (len)-1; \
383
+ } while(0)
384
+ #define HLL_SPARSE_XZERO_SET(p,len) do { \
385
+ int _l = (len)-1; \
386
+ *(p) = (_l>>8) | HLL_SPARSE_XZERO_BIT; \
387
+ *((p)+1) = (_l&0xff); \
388
+ } while(0)
389
+
390
+ /* ========================= HyperLogLog algorithm ========================= */
391
+
392
+ /* Our hash function is MurmurHash2, 64 bit version.
393
+ * It was modified for Redis in order to provide the same result in
394
+ * big and little endian archs (endian neutral). */
395
+ static uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
396
+ const uint64_t m = 0xc6a4a7935bd1e995;
397
+ const int r = 47;
398
+ uint64_t h = seed ^ (len * m);
399
+ const uint8_t *data = (const uint8_t *)key;
400
+ const uint8_t *end = data + (len-(len&7));
401
+
402
+ while(data != end) {
403
+ uint64_t k;
404
+
405
+ #if (BYTE_ORDER == LITTLE_ENDIAN)
406
+ k = *((uint64_t*)data);
407
+ #else
408
+ k = (uint64_t) data[0];
409
+ k |= (uint64_t) data[1] << 8;
410
+ k |= (uint64_t) data[2] << 16;
411
+ k |= (uint64_t) data[3] << 24;
412
+ k |= (uint64_t) data[4] << 32;
413
+ k |= (uint64_t) data[5] << 40;
414
+ k |= (uint64_t) data[6] << 48;
415
+ k |= (uint64_t) data[7] << 56;
416
+ #endif
417
+
418
+ k *= m;
419
+ k ^= k >> r;
420
+ k *= m;
421
+ h ^= k;
422
+ h *= m;
423
+ data += 8;
424
+ }
425
+
426
+ switch(len & 7) {
427
+ case 7: h ^= (uint64_t)data[6] << 48;
428
+ case 6: h ^= (uint64_t)data[5] << 40;
429
+ case 5: h ^= (uint64_t)data[4] << 32;
430
+ case 4: h ^= (uint64_t)data[3] << 24;
431
+ case 3: h ^= (uint64_t)data[2] << 16;
432
+ case 2: h ^= (uint64_t)data[1] << 8;
433
+ case 1: h ^= (uint64_t)data[0];
434
+ h *= m;
435
+ };
436
+
437
+ h ^= h >> r;
438
+ h *= m;
439
+ h ^= h >> r;
440
+ return h;
441
+ }
442
+
443
+ /* Given a string element to add to the HyperLogLog, returns the length
444
+ * of the pattern 000..1 of the element hash. As a side effect 'regp' is
445
+ * set to the register index this element hashes to. */
446
+ static int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
447
+ uint64_t hash, bit, index;
448
+ int count;
449
+
450
+ /* Count the number of zeroes starting from bit HLL_REGISTERS
451
+ * (that is a power of two corresponding to the first bit we don't use
452
+ * as index). The max run can be 64-P+1 bits.
453
+ *
454
+ * Note that the final "1" ending the sequence of zeroes must be
455
+ * included in the count, so if we find "001" the count is 3, and
456
+ * the smallest count possible is no zeroes at all, just a 1 bit
457
+ * at the first position, that is a count of 1.
458
+ *
459
+ * This may sound like inefficient, but actually in the average case
460
+ * there are high probabilities to find a 1 after a few iterations. */
461
+ hash = MurmurHash64A(ele,elesize,0xadc83b19ULL);
462
+ index = hash & HLL_P_MASK; /* Register index. */
463
+ hash |= ((uint64_t)1<<63); /* Make sure the loop terminates. */
464
+ bit = HLL_REGISTERS; /* First bit not used to address the register. */
465
+ count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */
466
+ while((hash & bit) == 0) {
467
+ count++;
468
+ bit <<= 1;
469
+ }
470
+ *regp = (int) index;
471
+ return count;
472
+ }
473
+
474
+ /* ================== Dense representation implementation ================== */
475
+
476
+ /* "Add" the element in the dense hyperloglog data structure.
477
+ * Actually nothing is added, but the max 0 pattern counter of the subset
478
+ * the element belongs to is incremented if needed.
479
+ *
480
+ * 'registers' is expected to have room for HLL_REGISTERS plus an
481
+ * additional byte on the right. This requirement is met by sds strings
482
+ * automatically since they are implicitly null terminated.
483
+ *
484
+ * The function always succeed, however if as a result of the operation
485
+ * the approximated cardinality changed, 1 is returned. Otherwise 0
486
+ * is returned. */
487
+ static int hllDenseAdd(uint8_t *registers, unsigned char *ele, size_t elesize) {
488
+ uint8_t oldcount, count;
489
+ long index;
490
+
491
+ /* Update the register if this element produced a longer run of zeroes. */
492
+ count = hllPatLen(ele,elesize,&index);
493
+ HLL_DENSE_GET_REGISTER(oldcount,registers,index);
494
+ if (count > oldcount) {
495
+ HLL_DENSE_SET_REGISTER(registers,index,count);
496
+ return 1;
497
+ } else {
498
+ return 0;
499
+ }
500
+ }
501
+
502
+ /* Compute SUM(2^-reg) in the dense representation.
503
+ * PE is an array with a pre-computer table of values 2^-reg indexed by reg.
504
+ * As a side effect the integer pointed by 'ezp' is set to the number
505
+ * of zero registers. */
506
+ static double hllDenseSum(uint8_t *registers, double *PE, int *ezp) {
507
+ double E = 0;
508
+ int j, ez = 0;
509
+
510
+ /* Redis default is to use 16384 registers 6 bits each. The code works
511
+ * with other values by modifying the defines, but for our target value
512
+ * we take a faster path with unrolled loops. */
513
+ if (HLL_REGISTERS == 16384 && HLL_BITS == 6) {
514
+ uint8_t *r = registers;
515
+ unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9,
516
+ r10, r11, r12, r13, r14, r15;
517
+ for (j = 0; j < 1024; j++) {
518
+ /* Handle 16 registers per iteration. */
519
+ r0 = r[0] & 63; if (r0 == 0) ez++;
520
+ r1 = (r[0] >> 6 | r[1] << 2) & 63; if (r1 == 0) ez++;
521
+ r2 = (r[1] >> 4 | r[2] << 4) & 63; if (r2 == 0) ez++;
522
+ r3 = (r[2] >> 2) & 63; if (r3 == 0) ez++;
523
+ r4 = r[3] & 63; if (r4 == 0) ez++;
524
+ r5 = (r[3] >> 6 | r[4] << 2) & 63; if (r5 == 0) ez++;
525
+ r6 = (r[4] >> 4 | r[5] << 4) & 63; if (r6 == 0) ez++;
526
+ r7 = (r[5] >> 2) & 63; if (r7 == 0) ez++;
527
+ r8 = r[6] & 63; if (r8 == 0) ez++;
528
+ r9 = (r[6] >> 6 | r[7] << 2) & 63; if (r9 == 0) ez++;
529
+ r10 = (r[7] >> 4 | r[8] << 4) & 63; if (r10 == 0) ez++;
530
+ r11 = (r[8] >> 2) & 63; if (r11 == 0) ez++;
531
+ r12 = r[9] & 63; if (r12 == 0) ez++;
532
+ r13 = (r[9] >> 6 | r[10] << 2) & 63; if (r13 == 0) ez++;
533
+ r14 = (r[10] >> 4 | r[11] << 4) & 63; if (r14 == 0) ez++;
534
+ r15 = (r[11] >> 2) & 63; if (r15 == 0) ez++;
535
+
536
+ /* Additional parens will allow the compiler to optimize the
537
+ * code more with a loss of precision that is not very relevant
538
+ * here (floating point math is not commutative!). */
539
+ E += (PE[r0] + PE[r1]) + (PE[r2] + PE[r3]) + (PE[r4] + PE[r5]) +
540
+ (PE[r6] + PE[r7]) + (PE[r8] + PE[r9]) + (PE[r10] + PE[r11]) +
541
+ (PE[r12] + PE[r13]) + (PE[r14] + PE[r15]);
542
+ r += 12;
543
+ }
544
+ } else {
545
+ for (j = 0; j < HLL_REGISTERS; j++) {
546
+ unsigned long reg;
547
+
548
+ HLL_DENSE_GET_REGISTER(reg,registers,j);
549
+ if (reg == 0) {
550
+ ez++;
551
+ /* Increment E at the end of the loop. */
552
+ } else {
553
+ E += PE[reg]; /* Precomputed 2^(-reg[j]). */
554
+ }
555
+ }
556
+ E += ez; /* Add 2^0 'ez' times. */
557
+ }
558
+ *ezp = ez;
559
+ return E;
560
+ }
561
+
562
+ /* ================== Sparse representation implementation ================= */
563
+
564
+ /* Convert the HLL with sparse representation given as input in its dense
565
+ * representation. Both representations are represented by SDS strings, and
566
+ * the input representation is freed as a side effect.
567
+ *
568
+ * The function returns 0 if the sparse representation was valid,
569
+ * otherwise 1 is returned if the representation was corrupted. */
570
+ static int hllSparseToDense(unsigned char *sparse, long sparselen, unsigned char **newstr, long *newstrlen) {
571
+ unsigned char *dense;
572
+ struct hllhdr *hdr, *oldhdr = (struct hllhdr*)sparse;
573
+ int idx = 0, runlen, regval;
574
+ uint8_t *p = (uint8_t*)sparse, *end = p+sparselen;
575
+
576
+ /* If the representation is already the right one return ASAP. */
577
+ hdr = (struct hllhdr*) sparse;
578
+ if (hdr->encoding == HLL_DENSE) return 0;
579
+
580
+ /* Create a string of the right size filled with zero bytes.
581
+ * Note that the cached cardinality is set to 0 as a side effect
582
+ * that is exactly the cardinality of an empty HLL. */
583
+ dense = calloc(1, sizeof(unsigned char) * (HLL_DENSE_SIZE + 1));
584
+ hdr = (struct hllhdr*) dense;
585
+ *hdr = *oldhdr; /* This will copy the magic and cached cardinality. */
586
+ hdr->encoding = HLL_DENSE;
587
+
588
+ /* Now read the sparse representation and set non-zero registers
589
+ * accordingly. */
590
+ p += HLL_HDR_SIZE;
591
+ while(p < end) {
592
+ if (HLL_SPARSE_IS_ZERO(p)) {
593
+ runlen = HLL_SPARSE_ZERO_LEN(p);
594
+ idx += runlen;
595
+ p++;
596
+ } else if (HLL_SPARSE_IS_XZERO(p)) {
597
+ runlen = HLL_SPARSE_XZERO_LEN(p);
598
+ idx += runlen;
599
+ p += 2;
600
+ } else {
601
+ runlen = HLL_SPARSE_VAL_LEN(p);
602
+ regval = HLL_SPARSE_VAL_VALUE(p);
603
+ while(runlen--) {
604
+ HLL_DENSE_SET_REGISTER(hdr->registers,idx,regval);
605
+ idx++;
606
+ }
607
+ p++;
608
+ }
609
+ }
610
+
611
+ /* If the sparse representation was valid, we expect to find idx
612
+ * set to HLL_REGISTERS. */
613
+ if (idx != HLL_REGISTERS) {
614
+ free(dense);
615
+ return 1;
616
+ }
617
+
618
+ /* Free the old representation and set the new one. */
619
+ free(sparse);
620
+ *newstr = dense;
621
+ *newstrlen = HLL_DENSE_SIZE;
622
+ return 0;
623
+ }
624
+
625
+ /* "Add" the element in the sparse hyperloglog data structure.
626
+ * Actually nothing is added, but the max 0 pattern counter of the subset
627
+ * the element belongs to is incremented if needed.
628
+ *
629
+ * The object 'o' is the String object holding the HLL. The function requires
630
+ * a reference to the object in order to be able to enlarge the string if
631
+ * needed.
632
+ *
633
+ * On success, the function returns 1 if the cardinality changed, or 0
634
+ * if the register for this element was not updated.
635
+ * On error (if the representation is invalid) -1 is returned.
636
+ *
637
+ * As a side effect the function may promote the HLL representation from
638
+ * sparse to dense: this happens when a register requires to be set to a value
639
+ * not representable with the sparse representation, or when the resulting
640
+ * size would be greater than server.hll_sparse_max_bytes. */
641
+ static int hllSparseAdd(unsigned char *str, long strlen, unsigned char *ele, size_t elesize, unsigned char **_str, long *_strlen) {
642
+ void *tmp;
643
+ struct hllhdr *hdr;
644
+ uint8_t oldcount, count, *sparse, *end, *p, *prev, *next;
645
+ long index, first, span;
646
+ long is_zero = 0, is_xzero = 0, is_val = 0, runlen = 0;
647
+
648
+ /* Update the register if this element produced a longer run of zeroes. */
649
+ count = hllPatLen(ele,elesize,&index);
650
+
651
+ /* If the count is too big to be representable by the sparse representation
652
+ * switch to dense representation. */
653
+ if (count > HLL_SPARSE_VAL_MAX_VALUE) goto promote;
654
+
655
+ /* When updating a sparse representation, sometimes we may need to
656
+ * enlarge the buffer for up to 3 bytes in the worst case (XZERO split
657
+ * into XZERO-VAL-XZERO). Make sure there is enough space right now
658
+ * so that the pointers we take during the execution of the function
659
+ * will be valid all the time. */
660
+ tmp = realloc(str, sizeof(unsigned char) * (strlen + 3));
661
+ if (!tmp) {
662
+ return 1;
663
+ }
664
+ *_str = str = tmp;
665
+
666
+ /* Step 1: we need to locate the opcode we need to modify to check
667
+ * if a value update is actually needed. */
668
+ sparse = p = ((uint8_t*)str) + HLL_HDR_SIZE;
669
+ end = p + strlen - HLL_HDR_SIZE;
670
+
671
+ first = 0;
672
+ prev = NULL; /* Points to previos opcode at the end of the loop. */
673
+ next = NULL; /* Points to the next opcode at the end of the loop. */
674
+ span = 0;
675
+ while(p < end) {
676
+ long oplen;
677
+
678
+ /* Set span to the number of registers covered by this opcode.
679
+ *
680
+ * This is the most performance critical loop of the sparse
681
+ * representation. Sorting the conditionals from the most to the
682
+ * least frequent opcode in many-bytes sparse HLLs is faster. */
683
+ oplen = 1;
684
+ if (HLL_SPARSE_IS_ZERO(p)) {
685
+ span = HLL_SPARSE_ZERO_LEN(p);
686
+ } else if (HLL_SPARSE_IS_VAL(p)) {
687
+ span = HLL_SPARSE_VAL_LEN(p);
688
+ } else { /* XZERO. */
689
+ span = HLL_SPARSE_XZERO_LEN(p);
690
+ oplen = 2;
691
+ }
692
+ /* Break if this opcode covers the register as 'index'. */
693
+ if (index <= first+span-1) break;
694
+ prev = p;
695
+ p += oplen;
696
+ first += span;
697
+ }
698
+ if (span == 0) return -1; /* Invalid format. */
699
+
700
+ next = HLL_SPARSE_IS_XZERO(p) ? p+2 : p+1;
701
+ if (next >= end) next = NULL;
702
+
703
+ /* Cache current opcode type to avoid using the macro again and
704
+ * again for something that will not change.
705
+ * Also cache the run-length of the opcode. */
706
+ if (HLL_SPARSE_IS_ZERO(p)) {
707
+ is_zero = 1;
708
+ runlen = HLL_SPARSE_ZERO_LEN(p);
709
+ } else if (HLL_SPARSE_IS_XZERO(p)) {
710
+ is_xzero = 1;
711
+ runlen = HLL_SPARSE_XZERO_LEN(p);
712
+ } else {
713
+ is_val = 1;
714
+ runlen = HLL_SPARSE_VAL_LEN(p);
715
+ }
716
+
717
+ /* Step 2: After the loop:
718
+ *
719
+ * 'first' stores to the index of the first register covered
720
+ * by the current opcode, which is pointed by 'p'.
721
+ *
722
+ * 'next' ad 'prev' store respectively the next and previous opcode,
723
+ * or NULL if the opcode at 'p' is respectively the last or first.
724
+ *
725
+ * 'span' is set to the number of registers covered by the current
726
+ * opcode.
727
+ *
728
+ * There are different cases in order to update the data structure
729
+ * in place without generating it from scratch:
730
+ *
731
+ * A) If it is a VAL opcode already set to a value >= our 'count'
732
+ * no update is needed, regardless of the VAL run-length field.
733
+ * In this case PFADD returns 0 since no changes are performed.
734
+ *
735
+ * B) If it is a VAL opcode with len = 1 (representing only our
736
+ * register) and the value is less than 'count', we just update it
737
+ * since this is a trivial case. */
738
+ if (is_val) {
739
+ oldcount = HLL_SPARSE_VAL_VALUE(p);
740
+ /* Case A. */
741
+ if (oldcount >= count) return 0;
742
+
743
+ /* Case B. */
744
+ if (runlen == 1) {
745
+ HLL_SPARSE_VAL_SET(p,count,1);
746
+ goto updated;
747
+ }
748
+ }
749
+
750
+ /* C) Another trivial to handle case is a ZERO opcode with a len of 1.
751
+ * We can just replace it with a VAL opcode with our value and len of 1. */
752
+ if (is_zero && runlen == 1) {
753
+ HLL_SPARSE_VAL_SET(p,count,1);
754
+ goto updated;
755
+ }
756
+
757
+ /* D) General case.
758
+ *
759
+ * The other cases are more complex: our register requires to be updated
760
+ * and is either currently represented by a VAL opcode with len > 1,
761
+ * by a ZERO opcode with len > 1, or by an XZERO opcode.
762
+ *
763
+ * In those cases the original opcode must be split into muliple
764
+ * opcodes. The worst case is an XZERO split in the middle resuling into
765
+ * XZERO - VAL - XZERO, so the resulting sequence max length is
766
+ * 5 bytes.
767
+ *
768
+ * We perform the split writing the new sequence into the 'new' buffer
769
+ * with 'newlen' as length. Later the new sequence is inserted in place
770
+ * of the old one, possibly moving what is on the right a few bytes
771
+ * if the new sequence is longer than the older one. */
772
+ uint8_t seq[5], *n = seq;
773
+ int last = first+span-1; /* Last register covered by the sequence. */
774
+ int len;
775
+
776
+ if (is_zero || is_xzero) {
777
+ /* Handle splitting of ZERO / XZERO. */
778
+ if (index != first) {
779
+ len = index-first;
780
+ if (len > HLL_SPARSE_ZERO_MAX_LEN) {
781
+ HLL_SPARSE_XZERO_SET(n,len);
782
+ n += 2;
783
+ } else {
784
+ HLL_SPARSE_ZERO_SET(n,len);
785
+ n++;
786
+ }
787
+ }
788
+ HLL_SPARSE_VAL_SET(n,count,1);
789
+ n++;
790
+ if (index != last) {
791
+ len = last-index;
792
+ if (len > HLL_SPARSE_ZERO_MAX_LEN) {
793
+ HLL_SPARSE_XZERO_SET(n,len);
794
+ n += 2;
795
+ } else {
796
+ HLL_SPARSE_ZERO_SET(n,len);
797
+ n++;
798
+ }
799
+ }
800
+ } else {
801
+ /* Handle splitting of VAL. */
802
+ int curval = HLL_SPARSE_VAL_VALUE(p);
803
+
804
+ if (index != first) {
805
+ len = index-first;
806
+ HLL_SPARSE_VAL_SET(n,curval,len);
807
+ n++;
808
+ }
809
+ HLL_SPARSE_VAL_SET(n,count,1);
810
+ n++;
811
+ if (index != last) {
812
+ len = last-index;
813
+ HLL_SPARSE_VAL_SET(n,curval,len);
814
+ n++;
815
+ }
816
+ }
817
+
818
+ /* Step 3: substitute the new sequence with the old one.
819
+ *
820
+ * Note that we already allocated space on the sds string
821
+ * calling sdsMakeRoomFor(). */
822
+ int seqlen = n-seq;
823
+ int oldlen = is_xzero ? 2 : 1;
824
+ int deltalen = seqlen-oldlen;
825
+
826
+ if (deltalen > 0 &&
827
+ (size_t)(strlen+deltalen) > rl_hll_sparse_max_bytes) goto promote;
828
+ if (deltalen && next) memmove(next+deltalen,next,end-next);
829
+ strlen += deltalen;
830
+ *_strlen = strlen;
831
+ memcpy(p,seq,seqlen);
832
+ end += deltalen;
833
+
834
+ updated:
835
+ /* Step 4: Merge adjacent values if possible.
836
+ *
837
+ * The representation was updated, however the resulting representation
838
+ * may not be optimal: adjacent VAL opcodes can sometimes be merged into
839
+ * a single one. */
840
+ p = prev ? prev : sparse;
841
+ int scanlen = 5; /* Scan up to 5 upcodes starting from prev. */
842
+ while (p < end && scanlen--) {
843
+ if (HLL_SPARSE_IS_XZERO(p)) {
844
+ p += 2;
845
+ continue;
846
+ } else if (HLL_SPARSE_IS_ZERO(p)) {
847
+ p++;
848
+ continue;
849
+ }
850
+ /* We need two adjacent VAL opcodes to try a merge, having
851
+ * the same value, and a len that fits the VAL opcode max len. */
852
+ if (p+1 < end && HLL_SPARSE_IS_VAL(p+1)) {
853
+ int v1 = HLL_SPARSE_VAL_VALUE(p);
854
+ int v2 = HLL_SPARSE_VAL_VALUE(p+1);
855
+ if (v1 == v2) {
856
+ int len = HLL_SPARSE_VAL_LEN(p)+HLL_SPARSE_VAL_LEN(p+1);
857
+ if (len <= HLL_SPARSE_VAL_MAX_LEN) {
858
+ HLL_SPARSE_VAL_SET(p+1,v1,len);
859
+ memmove(p,p+1,end-p);
860
+ strlen--;
861
+ end--;
862
+ *_strlen = strlen;
863
+ /* After a merge we reiterate without incrementing 'p'
864
+ * in order to try to merge the just merged value with
865
+ * a value on its right. */
866
+ continue;
867
+ }
868
+ }
869
+ }
870
+ p++;
871
+ }
872
+
873
+ /* Invalidate the cached cardinality. */
874
+ hdr = (struct hllhdr *)str;
875
+ HLL_INVALIDATE_CACHE(hdr);
876
+ return 1;
877
+
878
+ promote: /* Promote to dense representation. */
879
+ if (hllSparseToDense(str, strlen, &str, &strlen) == 1) return -1; /* Corrupted HLL. */
880
+ hdr = (struct hllhdr *)str;
881
+
882
+ /* We need to call hllDenseAdd() to perform the operation after the
883
+ * conversion. However the result must be 1, since if we need to
884
+ * convert from sparse to dense a register requires to be updated.
885
+ *
886
+ * Note that this in turn means that PFADD will make sure the command
887
+ * is propagated to slaves / AOF, so if there is a sparse -> dense
888
+ * convertion, it will be performed in all the slaves as well. */
889
+ int dense_retval = hllDenseAdd(hdr->registers, ele, elesize);
890
+ *_str = str;
891
+ *_strlen = strlen;
892
+ return dense_retval;
893
+ }
894
+
895
+ /* Compute SUM(2^-reg) in the sparse representation.
896
+ * PE is an array with a pre-computer table of values 2^-reg indexed by reg.
897
+ * As a side effect the integer pointed by 'ezp' is set to the number
898
+ * of zero registers. */
899
+ static double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp, int *invalid) {
900
+ double E = 0;
901
+ int ez = 0, idx = 0, runlen, regval;
902
+ uint8_t *end = sparse+sparselen, *p = sparse;
903
+
904
+ while(p < end) {
905
+ if (HLL_SPARSE_IS_ZERO(p)) {
906
+ runlen = HLL_SPARSE_ZERO_LEN(p);
907
+ idx += runlen;
908
+ ez += runlen;
909
+ /* Increment E at the end of the loop. */
910
+ p++;
911
+ } else if (HLL_SPARSE_IS_XZERO(p)) {
912
+ runlen = HLL_SPARSE_XZERO_LEN(p);
913
+ idx += runlen;
914
+ ez += runlen;
915
+ /* Increment E at the end of the loop. */
916
+ p += 2;
917
+ } else {
918
+ runlen = HLL_SPARSE_VAL_LEN(p);
919
+ regval = HLL_SPARSE_VAL_VALUE(p);
920
+ idx += runlen;
921
+ E += PE[regval]*runlen;
922
+ p++;
923
+ }
924
+ }
925
+ if (idx != HLL_REGISTERS && invalid) *invalid = 1;
926
+ E += ez; /* Add 2^0 'ez' times. */
927
+ *ezp = ez;
928
+ return E;
929
+ }
930
+
931
+ /* ========================= HyperLogLog Count ==============================
932
+ * This is the core of the algorithm where the approximated count is computed.
933
+ * The function uses the lower level hllDenseSum() and hllSparseSum() functions
934
+ * as helpers to compute the SUM(2^-reg) part of the computation, which is
935
+ * representation-specific, while all the rest is common. */
936
+
937
+ /* Implements the SUM operation for uint8_t data type which is only used
938
+ * internally as speedup for PFCOUNT with multiple keys. */
939
+ static double hllRawSum(uint8_t *registers, double *PE, int *ezp) {
940
+ double E = 0;
941
+ int j, ez = 0;
942
+ uint64_t *word = (uint64_t*) registers;
943
+ uint8_t *bytes;
944
+
945
+ for (j = 0; j < HLL_REGISTERS/8; j++) {
946
+ if (*word == 0) {
947
+ ez += 8;
948
+ } else {
949
+ bytes = (uint8_t*) word;
950
+ if (bytes[0]) E += PE[bytes[0]]; else ez++;
951
+ if (bytes[1]) E += PE[bytes[1]]; else ez++;
952
+ if (bytes[2]) E += PE[bytes[2]]; else ez++;
953
+ if (bytes[3]) E += PE[bytes[3]]; else ez++;
954
+ if (bytes[4]) E += PE[bytes[4]]; else ez++;
955
+ if (bytes[5]) E += PE[bytes[5]]; else ez++;
956
+ if (bytes[6]) E += PE[bytes[6]]; else ez++;
957
+ if (bytes[7]) E += PE[bytes[7]]; else ez++;
958
+ }
959
+ word++;
960
+ }
961
+ E += ez; /* 2^(-reg[j]) is 1 when m is 0, add it 'ez' times for every
962
+ zero register in the HLL. */
963
+ *ezp = ez;
964
+ return E;
965
+ }
966
+
967
+ /* Return the approximated cardinality of the set based on the harmonic
968
+ * mean of the registers values. 'hdr' points to the start of the SDS
969
+ * representing the String object holding the HLL representation.
970
+ *
971
+ * If the sparse representation of the HLL object is not valid, the integer
972
+ * pointed by 'invalid' is set to non-zero, otherwise it is left untouched.
973
+ *
974
+ * hllCount() supports a special internal-only encoding of HLL_RAW, that
975
+ * is, hdr->registers will point to an uint8_t array of HLL_REGISTERS element.
976
+ * This is useful in order to speedup PFCOUNT when called against multiple
977
+ * keys (no need to work with 6-bit integers encoding). */
978
+ static uint64_t hllCount(struct hllhdr *hdr, long strlen, int *invalid) {
979
+ double m = HLL_REGISTERS;
980
+ double E, alpha = 0.7213/(1+1.079/m);
981
+ int j, ez; /* Number of registers equal to 0. */
982
+
983
+ /* We precompute 2^(-reg[j]) in a small table in order to
984
+ * speedup the computation of SUM(2^-register[0..i]). */
985
+ static int initialized = 0;
986
+ static double PE[64];
987
+ if (!initialized) {
988
+ PE[0] = 1; /* 2^(-reg[j]) is 1 when m is 0. */
989
+ for (j = 1; j < 64; j++) {
990
+ /* 2^(-reg[j]) is the same as 1/2^reg[j]. */
991
+ PE[j] = 1.0/(1ULL << j);
992
+ }
993
+ initialized = 1;
994
+ }
995
+
996
+ /* Compute SUM(2^-register[0..i]). */
997
+ if (hdr->encoding == HLL_DENSE) {
998
+ E = hllDenseSum(hdr->registers,PE,&ez);
999
+ } else if (hdr->encoding == HLL_SPARSE) {
1000
+ E = hllSparseSum(hdr->registers, strlen-HLL_HDR_SIZE,PE,&ez,invalid);
1001
+ } else if (hdr->encoding == HLL_RAW) {
1002
+ E = hllRawSum(hdr->registers,PE,&ez);
1003
+ } else {
1004
+ return 1;
1005
+ }
1006
+
1007
+ /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */
1008
+ E = (1/E)*alpha*m*m;
1009
+
1010
+ /* Use the LINEARCOUNTING algorithm for small cardinalities.
1011
+ * For larger values but up to 72000 HyperLogLog raw approximation is
1012
+ * used since linear counting error starts to increase. However HyperLogLog
1013
+ * shows a strong bias in the range 2.5*16384 - 72000, so we try to
1014
+ * compensate for it. */
1015
+ if (E < m*2.5 && ez != 0) {
1016
+ E = m*log(m/ez); /* LINEARCOUNTING() */
1017
+ } else if (m == 16384 && E < 72000) {
1018
+ /* We did polynomial regression of the bias for this range, this
1019
+ * way we can compute the bias for a given cardinality and correct
1020
+ * according to it. Only apply the correction for P=14 that's what
1021
+ * we use and the value the correction was verified with. */
1022
+ double bias = 5.9119*1.0e-18*(E*E*E*E)
1023
+ -1.4253*1.0e-12*(E*E*E)+
1024
+ 1.2940*1.0e-7*(E*E)
1025
+ -5.2921*1.0e-3*E+
1026
+ 83.3216;
1027
+ E -= E*(bias/100);
1028
+ }
1029
+ /* We don't apply the correction for E > 1/30 of 2^32 since we use
1030
+ * a 64 bit function and 6 bit counters. To apply the correction for
1031
+ * 1/30 of 2^64 is not needed since it would require a huge set
1032
+ * to approach such a value. */
1033
+ return (uint64_t) E;
1034
+ }
1035
+
1036
+ /* Call hllDenseAdd() or hllSparseAdd() according to the HLL encoding. */
1037
+ static int hllAdd(unsigned char *str, long strlen, unsigned char *ele, size_t elesize, unsigned char **_str, long *_strlen) {
1038
+ *_str = str;
1039
+ *_strlen = strlen;
1040
+ struct hllhdr *hdr = (struct hllhdr *)str;
1041
+ switch(hdr->encoding) {
1042
+ case HLL_DENSE: return hllDenseAdd(hdr->registers, ele, elesize);
1043
+ case HLL_SPARSE: return hllSparseAdd(str, strlen, ele,elesize, _str, _strlen);
1044
+ default: return -1; /* Invalid representation. */
1045
+ }
1046
+ }
1047
+
1048
+ /* Merge by computing MAX(registers[i],hll[i]) the HyperLogLog 'hll'
1049
+ * with an array of uint8_t HLL_REGISTERS registers pointed by 'max'.
1050
+ *
1051
+ * The hll object must be already validated via isHLLObject()
1052
+ * or in some other way.
1053
+ *
1054
+ * If the HyperLogLog is sparse and is found to be invalid, 1
1055
+ * is returned, otherwise the function always succeeds. */
1056
+ static int hllMerge(uint8_t *max, unsigned char *str, long strlen) {
1057
+ struct hllhdr *hdr = (struct hllhdr *)str;
1058
+ int i;
1059
+
1060
+ if (hdr->encoding == HLL_DENSE) {
1061
+ uint8_t val;
1062
+
1063
+ for (i = 0; i < HLL_REGISTERS; i++) {
1064
+ HLL_DENSE_GET_REGISTER(val,hdr->registers,i);
1065
+ if (val > max[i]) max[i] = val;
1066
+ }
1067
+ } else {
1068
+ uint8_t *p = (uint8_t *)hdr, *end = p + strlen;
1069
+ long runlen, regval;
1070
+
1071
+ p += HLL_HDR_SIZE;
1072
+ i = 0;
1073
+ while(p < end) {
1074
+ if (HLL_SPARSE_IS_ZERO(p)) {
1075
+ runlen = HLL_SPARSE_ZERO_LEN(p);
1076
+ i += runlen;
1077
+ p++;
1078
+ } else if (HLL_SPARSE_IS_XZERO(p)) {
1079
+ runlen = HLL_SPARSE_XZERO_LEN(p);
1080
+ i += runlen;
1081
+ p += 2;
1082
+ } else {
1083
+ runlen = HLL_SPARSE_VAL_LEN(p);
1084
+ regval = HLL_SPARSE_VAL_VALUE(p);
1085
+ while(runlen--) {
1086
+ if (regval > max[i]) max[i] = regval;
1087
+ i++;
1088
+ }
1089
+ p++;
1090
+ }
1091
+ }
1092
+ if (i != HLL_REGISTERS) return 1;
1093
+ }
1094
+ return 0;
1095
+ }
1096
+
1097
+ /* ========================== HyperLogLog commands ========================== */
1098
+
1099
+ /* Create an HLL object. We always create the HLL using sparse encoding.
1100
+ * This will be upgraded to the dense representation as needed. */
1101
+ static int createHLLObject(unsigned char **_str, long *_strlen) {
1102
+ struct hllhdr *hdr;
1103
+ unsigned char *s;
1104
+ uint8_t *p;
1105
+ int sparselen = HLL_HDR_SIZE +
1106
+ (((HLL_REGISTERS+(HLL_SPARSE_XZERO_MAX_LEN-1)) /
1107
+ HLL_SPARSE_XZERO_MAX_LEN)*2);
1108
+ int aux;
1109
+
1110
+ /* Populate the sparse representation with as many XZERO opcodes as
1111
+ * needed to represent all the registers. */
1112
+ aux = HLL_REGISTERS;
1113
+ s = calloc(1, sizeof(unsigned char) * (sparselen + 1));
1114
+ p = (uint8_t*)s + HLL_HDR_SIZE;
1115
+ while(aux) {
1116
+ int xzero = HLL_SPARSE_XZERO_MAX_LEN;
1117
+ if (xzero > aux) xzero = aux;
1118
+ HLL_SPARSE_XZERO_SET(p,xzero);
1119
+ p += 2;
1120
+ aux -= xzero;
1121
+ }
1122
+ if (!((p-(uint8_t*)s) == sparselen)) return 1;
1123
+
1124
+ /* Create the actual object. */
1125
+ hdr = (struct hllhdr *)s;
1126
+ memcpy(hdr->magic,"HYLL",4);
1127
+ hdr->encoding = HLL_SPARSE;
1128
+ *_str = s;
1129
+ *_strlen = sparselen;
1130
+ return 0;
1131
+ }
1132
+
1133
+ /* Check if the object is a String with a valid HLL representation.
1134
+ * Return 0 if this is true, otherwise return 1. */
1135
+ static int isHLLObject(unsigned char *str, long strlen) {
1136
+ struct hllhdr *hdr;
1137
+
1138
+ if ((size_t)strlen < sizeof(*hdr)) goto invalid;
1139
+ hdr = (struct hllhdr *)str;
1140
+
1141
+ /* Magic should be "HYLL". */
1142
+ if (hdr->magic[0] != 'H' || hdr->magic[1] != 'Y' ||
1143
+ hdr->magic[2] != 'L' || hdr->magic[3] != 'L') goto invalid;
1144
+
1145
+ if (hdr->encoding > HLL_MAX_ENCODING) goto invalid;
1146
+
1147
+ /* Dense representation string length should match exactly. */
1148
+ if (hdr->encoding == HLL_DENSE &&
1149
+ strlen != HLL_DENSE_SIZE) goto invalid;
1150
+
1151
+ /* All tests passed. */
1152
+ return 0;
1153
+
1154
+ invalid:
1155
+ return -1;
1156
+ }
1157
+
1158
+ /* PFADD var ele ele ele ... ele => :0 or :1 */
1159
+ int rl_str_pfadd(unsigned char *str, long strlen, int argc, unsigned char **argv, long *argvlen, unsigned char **_str, long *_strlen) {
1160
+ struct hllhdr *hdr;
1161
+ int updated = 0, j;
1162
+
1163
+ if (str == NULL) {
1164
+ /* Create the key with a string value of the exact length to
1165
+ * hold our HLL data structure. sdsnewlen() when NULL is passed
1166
+ * is guaranteed to return bytes initialized to zero. */
1167
+ if (createHLLObject(&str, &strlen)) {
1168
+ return -1;
1169
+ }
1170
+ updated++;
1171
+ } else {
1172
+ if (isHLLObject(str, strlen) != 0) return -1;
1173
+ }
1174
+ /* Perform the low level ADD operation for every element. */
1175
+ for (j = 0; j < argc; j++) {
1176
+ int retval = hllAdd(str, strlen, argv[j], argvlen[j], &str, &strlen);
1177
+ switch(retval) {
1178
+ case 1:
1179
+ updated++;
1180
+ break;
1181
+ case -1:
1182
+ return -1;
1183
+ }
1184
+ }
1185
+ hdr = (struct hllhdr *)str;
1186
+ if (updated) {
1187
+ HLL_INVALIDATE_CACHE(hdr);
1188
+ }
1189
+ *_str = str;
1190
+ *_strlen = strlen;
1191
+ return updated ? 1 : 0;
1192
+ }
1193
+
1194
+ /* PFCOUNT var -> approximated cardinality of set. */
1195
+ int rl_str_pfcount(int argc, unsigned char **argv, long *argvlen, long *_card, unsigned char **updatevalue, long *updatevaluelen) {
1196
+ struct hllhdr *hdr;
1197
+ uint64_t card;
1198
+
1199
+ /* Case 1: multi-key keys, cardinality of the union.
1200
+ *
1201
+ * When multiple keys are specified, PFCOUNT actually computes
1202
+ * the cardinality of the merge of the N HLLs specified. */
1203
+ if (argc > 1) {
1204
+ uint8_t max[HLL_HDR_SIZE+HLL_REGISTERS], *registers;
1205
+ int j;
1206
+
1207
+ /* Compute an HLL with M[i] = MAX(M[i]_j). */
1208
+ memset(max,0,sizeof(max));
1209
+ hdr = (struct hllhdr*) max;
1210
+ hdr->encoding = HLL_RAW; /* Special internal-only encoding. */
1211
+ registers = max + HLL_HDR_SIZE;
1212
+ for (j = 0; j < argc; j++) {
1213
+ /* Check type and size. */
1214
+ if (argv[j] == NULL) continue; /* Assume empty HLL for non existing var. */
1215
+ if (isHLLObject(argv[j], argvlen[j]) != 0) return -1;
1216
+
1217
+ /* Merge with this HLL with our 'max' HHL by setting max[i]
1218
+ * to MAX(max[i],hll[i]). */
1219
+ if (hllMerge(registers, argv[j], argvlen[j]) == 1) {
1220
+ return -1;
1221
+ }
1222
+ }
1223
+
1224
+ /* Compute cardinality of the resulting set. */
1225
+ // TODO: probably not 0
1226
+ *_card = hllCount(hdr, 0, NULL);
1227
+ return 0;
1228
+ }
1229
+
1230
+ /* Case 2: cardinality of the single HLL.
1231
+ *
1232
+ * The user specified a single key. Either return the cached value
1233
+ * or compute one and update the cache. */
1234
+ if (argv[0] == NULL) {
1235
+ /* No key? Cardinality is zero since no element was added, otherwise
1236
+ * we would have a key as HLLADD creates it as a side effect. */
1237
+ *_card = 0;
1238
+ } else {
1239
+ if (isHLLObject(argv[0], argvlen[0]) != 0) return -1;
1240
+
1241
+ /* Check if the cached cardinality is valid. */
1242
+ hdr = (struct hllhdr*)argv[0];
1243
+ if (HLL_VALID_CACHE(hdr)) {
1244
+ /* Just return the cached value. */
1245
+ card = (uint64_t)hdr->card[0];
1246
+ card |= (uint64_t)hdr->card[1] << 8;
1247
+ card |= (uint64_t)hdr->card[2] << 16;
1248
+ card |= (uint64_t)hdr->card[3] << 24;
1249
+ card |= (uint64_t)hdr->card[4] << 32;
1250
+ card |= (uint64_t)hdr->card[5] << 40;
1251
+ card |= (uint64_t)hdr->card[6] << 48;
1252
+ card |= (uint64_t)hdr->card[7] << 56;
1253
+ } else {
1254
+ int invalid = 0;
1255
+ /* Recompute it and update the cached value. */
1256
+ card = hllCount(hdr, argvlen[0], &invalid);
1257
+ if (invalid) {
1258
+ return -1;
1259
+ }
1260
+ hdr->card[0] = card & 0xff;
1261
+ hdr->card[1] = (card >> 8) & 0xff;
1262
+ hdr->card[2] = (card >> 16) & 0xff;
1263
+ hdr->card[3] = (card >> 24) & 0xff;
1264
+ hdr->card[4] = (card >> 32) & 0xff;
1265
+ hdr->card[5] = (card >> 40) & 0xff;
1266
+ hdr->card[6] = (card >> 48) & 0xff;
1267
+ hdr->card[7] = (card >> 56) & 0xff;
1268
+ /* This is not considered a read-only command even if the
1269
+ * data structure is not modified, since the cached value
1270
+ * may be modified and given that the HLL is a Redis string
1271
+ * we need to propagate the change. */
1272
+ if (updatevalue) {
1273
+ *updatevalue = (unsigned char *)hdr;
1274
+ }
1275
+ if (updatevalue) {
1276
+ *updatevaluelen = argvlen[0];
1277
+ }
1278
+ }
1279
+ *_card = card;
1280
+ }
1281
+ return 0;
1282
+ }
1283
+
1284
+ int rl_str_pfmerge(int argc, unsigned char **argv, long *argvlen, unsigned char **_str, long *_strlen) {
1285
+ uint8_t max[HLL_REGISTERS];
1286
+ struct hllhdr *hdr;
1287
+ int j;
1288
+
1289
+ /* Compute an HLL with M[i] = MAX(M[i]_j).
1290
+ * We we the maximum into the max array of registers. We'll write
1291
+ * it to the target variable later. */
1292
+ memset(max,0,sizeof(max));
1293
+ for (j = 0; j < argc; j++) {
1294
+ /* Check type and size. */
1295
+ if (argv[j] == NULL) continue; /* Assume empty HLL for non existing var. */
1296
+ if (isHLLObject(argv[j], argvlen[j]) != 0) return -1;
1297
+
1298
+ /* Merge with this HLL with our 'max' HHL by setting max[i]
1299
+ * to MAX(max[i],hll[i]). */
1300
+ if (hllMerge(max, argv[j], argvlen[j]) == 1) {
1301
+ return -1;
1302
+ }
1303
+ }
1304
+
1305
+ createHLLObject(_str, _strlen);
1306
+
1307
+ /* Only support dense objects as destination. */
1308
+ if (hllSparseToDense(*_str, *_strlen, _str, _strlen) != 0) {
1309
+ return -1;
1310
+ }
1311
+
1312
+ /* Write the resulting HLL to the destination HLL registers and
1313
+ * invalidate the cached value. */
1314
+ hdr = (struct hllhdr *)*_str;
1315
+ for (j = 0; j < HLL_REGISTERS; j++) {
1316
+ HLL_DENSE_SET_REGISTER(hdr->registers,j,max[j]);
1317
+ }
1318
+ HLL_INVALIDATE_CACHE(hdr);
1319
+ return 0;
1320
+ }
1321
+
1322
+ /* ========================== Testing / Debugging ========================== */
1323
+
1324
+ /* PFSELFTEST
1325
+ * This command performs a self-test of the HLL registers implementation.
1326
+ * Something that is not easy to test from within the outside. */
1327
+ #define HLL_TEST_CYCLES 1000
1328
+ int rl_str_pfselftest() {
1329
+ int retval;
1330
+ long olen;
1331
+ unsigned int j, i;
1332
+ unsigned char *bitcounters = calloc(1, sizeof(unsigned char) * (HLL_DENSE_SIZE + 1));
1333
+ struct hllhdr *hdr = (struct hllhdr*) bitcounters, *hdr2;
1334
+ unsigned char *o = NULL;
1335
+ uint8_t bytecounters[HLL_REGISTERS];
1336
+
1337
+ /* Test 1: access registers.
1338
+ * The test is conceived to test that the different counters of our data
1339
+ * structure are accessible and that setting their values both result in
1340
+ * the correct value to be retained and not affect adjacent values. */
1341
+ for (j = 0; j < HLL_TEST_CYCLES; j++) {
1342
+ /* Set the HLL counters and an array of unsigned byes of the
1343
+ * same size to the same set of random values. */
1344
+ for (i = 0; i < HLL_REGISTERS; i++) {
1345
+ unsigned int r = rand() & HLL_REGISTER_MAX;
1346
+
1347
+ bytecounters[i] = r;
1348
+ HLL_DENSE_SET_REGISTER(hdr->registers,i,r);
1349
+ }
1350
+ /* Check that we are able to retrieve the same values. */
1351
+ for (i = 0; i < HLL_REGISTERS; i++) {
1352
+ unsigned int val;
1353
+
1354
+ HLL_DENSE_GET_REGISTER(val,hdr->registers,i);
1355
+ if (val != bytecounters[i]) {
1356
+ retval = -1;
1357
+ goto cleanup;
1358
+ }
1359
+ }
1360
+ }
1361
+
1362
+ /* Test 2: approximation error.
1363
+ * The test adds unique elements and check that the estimated value
1364
+ * is always reasonable bounds.
1365
+ *
1366
+ * We check that the error is smaller than a few times than the expected
1367
+ * standard error, to make it very unlikely for the test to fail because
1368
+ * of a "bad" run.
1369
+ *
1370
+ * The test is performed with both dense and sparse HLLs at the same
1371
+ * time also verifying that the computed cardinality is the same. */
1372
+ memset(hdr->registers,0,HLL_DENSE_SIZE-HLL_HDR_SIZE);
1373
+ createHLLObject(&o, &olen);
1374
+ double relerr = 1.04/sqrt(HLL_REGISTERS);
1375
+ int64_t checkpoint = 1;
1376
+ uint64_t seed = (uint64_t)rand() | (uint64_t)rand() << 32;
1377
+ uint64_t ele;
1378
+ for (j = 1; j <= 10000000; j++) {
1379
+ ele = j ^ seed;
1380
+ hllDenseAdd(hdr->registers,(unsigned char*)&ele,sizeof(ele));
1381
+ hllAdd(o, olen, (unsigned char*)&ele, sizeof(ele), &o, &olen);
1382
+
1383
+ /* Make sure that for small cardinalities we use sparse
1384
+ * encoding. */
1385
+ if (j == checkpoint && j < rl_hll_sparse_max_bytes/2) {
1386
+ hdr2 = (struct hllhdr *)o;
1387
+ if (hdr2->encoding != HLL_SPARSE) {
1388
+ retval = -2;
1389
+ goto cleanup;
1390
+ }
1391
+ }
1392
+
1393
+ /* Check that dense and sparse representations agree. */
1394
+ if (j == checkpoint && hllCount(hdr, HLL_DENSE_SIZE, NULL) != hllCount((struct hllhdr *)o, olen, NULL)) {
1395
+ retval = -3;
1396
+ goto cleanup;
1397
+ }
1398
+
1399
+ /* Check error. */
1400
+ if (j == checkpoint) {
1401
+ int64_t abserr = checkpoint - (int64_t)hllCount(hdr, HLL_DENSE_SIZE, NULL);
1402
+ uint64_t maxerr = ceil(relerr*6*checkpoint);
1403
+
1404
+ /* Adjust the max error we expect for cardinality 10
1405
+ * since from time to time it is statistically likely to get
1406
+ * much higher error due to collision, resulting into a false
1407
+ * positive. */
1408
+ if (j == 10) maxerr = 1;
1409
+
1410
+ if (abserr < 0) abserr = -abserr;
1411
+ if (abserr > (int64_t)maxerr) {
1412
+ retval = -4;
1413
+ goto cleanup;
1414
+ }
1415
+ checkpoint *= 10;
1416
+ }
1417
+ }
1418
+
1419
+ retval = 0;
1420
+
1421
+ cleanup:
1422
+ free(bitcounters);
1423
+ free(o);
1424
+ return retval;
1425
+ }
1426
+
1427
+ int rl_str_pfdebug_getreg(unsigned char *str, long strlen, int *size, long **elements, unsigned char **_str, long *_strlen) {
1428
+ struct hllhdr *hdr;
1429
+ int j;
1430
+
1431
+ if (isHLLObject(str, strlen) != 0) return -1;
1432
+ hdr = (struct hllhdr *)str;
1433
+
1434
+ if (hdr->encoding == HLL_SPARSE) {
1435
+ if (hllSparseToDense(str, strlen, &str, &strlen) == 1) {
1436
+ return -2;
1437
+ }
1438
+ }
1439
+ hdr = (struct hllhdr *)str;
1440
+
1441
+ *size = HLL_REGISTERS;
1442
+ *elements = malloc(sizeof(long) * HLL_REGISTERS);
1443
+ if (!*elements) {
1444
+ return -3;
1445
+ }
1446
+ for (j = 0; j < HLL_REGISTERS; j++) {
1447
+ uint8_t val;
1448
+
1449
+ HLL_DENSE_GET_REGISTER(val,hdr->registers,j);
1450
+ (*elements)[j] = (long)val;
1451
+ }
1452
+ *_str = str;
1453
+ *_strlen = strlen;
1454
+ return 0;
1455
+ }
1456
+
1457
+ #define APPEND(obj, objlen, objalloc, src, srclen)\
1458
+ while (objlen + srclen > objalloc) {\
1459
+ unsigned char *tmp = realloc(obj, sizeof(unsigned char) * objalloc * 2);\
1460
+ if (!tmp) {\
1461
+ free(obj);\
1462
+ return -4;\
1463
+ }\
1464
+ obj = tmp;\
1465
+ objalloc *= 2;\
1466
+ }\
1467
+ memcpy(&obj[objlen], src, srclen);\
1468
+ objlen += srclen;
1469
+
1470
+ int rl_str_pfdebug_decode(unsigned char *str, long strlen, unsigned char **response, long *responselen) {
1471
+ struct hllhdr *hdr;
1472
+
1473
+ if (isHLLObject(str, strlen) != 0) return -1;
1474
+ hdr = (struct hllhdr *)str;
1475
+
1476
+ if (hdr->encoding != HLL_SPARSE) {
1477
+ return -2;
1478
+ }
1479
+
1480
+ uint8_t *p = str, *end = p+strlen;
1481
+ long decodedlen = 0;
1482
+ long decodedall = strlen * 10;
1483
+ unsigned char *decoded = malloc(sizeof(unsigned char) * decodedall);
1484
+ char tmp[100];
1485
+ long tmplen;
1486
+
1487
+ p += HLL_HDR_SIZE;
1488
+ while(p < end) {
1489
+ int runlen, regval;
1490
+
1491
+ if (HLL_SPARSE_IS_ZERO(p)) {
1492
+ runlen = HLL_SPARSE_ZERO_LEN(p);
1493
+ p++;
1494
+ tmplen = snprintf(tmp, 100, "z:%d ", runlen);
1495
+ APPEND(decoded, decodedlen, decodedall, tmp, tmplen);
1496
+ } else if (HLL_SPARSE_IS_XZERO(p)) {
1497
+ runlen = HLL_SPARSE_XZERO_LEN(p);
1498
+ p += 2;
1499
+ tmplen = snprintf(tmp, 100, "Z:%d ", runlen);
1500
+ APPEND(decoded, decodedlen, decodedall, tmp, tmplen);
1501
+ } else {
1502
+ runlen = HLL_SPARSE_VAL_LEN(p);
1503
+ regval = HLL_SPARSE_VAL_VALUE(p);
1504
+ p++;
1505
+ tmplen = snprintf(tmp, 100, "v:%d,%d ",regval,runlen);
1506
+ APPEND(decoded, decodedlen, decodedall, tmp, tmplen);
1507
+ }
1508
+ }
1509
+ decodedlen--;
1510
+ decoded = realloc(decoded, sizeof(unsigned char) * (decodedlen));
1511
+ *response = decoded;
1512
+ *responselen = decodedlen;
1513
+ return 0;
1514
+ }
1515
+
1516
+ int rl_str_pfdebug_encoding(unsigned char *str, long strlen, unsigned char **response, long *responselen) {
1517
+ struct hllhdr *hdr;
1518
+
1519
+ if (isHLLObject(str, strlen) != 0) return -1;
1520
+ hdr = (struct hllhdr *)str;
1521
+
1522
+ char *encodingstr[2] = {"dense","sparse"};
1523
+
1524
+ *responselen = 5 + hdr->encoding;
1525
+ *response = malloc(sizeof(unsigned char) * (*responselen));
1526
+ memcpy(*response, encodingstr[hdr->encoding], *responselen);
1527
+ return 0;
1528
+ }
1529
+
1530
+ int rl_str_pfdebug_todense(unsigned char *str, long strlen, unsigned char **_str, long *_strlen) {
1531
+ struct hllhdr *hdr;
1532
+
1533
+ if (isHLLObject(str, strlen) != 0) return -1;
1534
+ hdr = (struct hllhdr *)str;
1535
+
1536
+ int conv = 0;
1537
+
1538
+ if (hdr->encoding == HLL_SPARSE) {
1539
+ if (hllSparseToDense(str, strlen, &str, &strlen) == 1) {
1540
+ return -2;
1541
+ }
1542
+ conv = 1;
1543
+ }
1544
+ *_str = str;
1545
+ *_strlen = strlen;
1546
+ return conv;
1547
+ }