datasketches 0.1.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (205) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +17 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  6. data/ext/datasketches/ext.cpp +1 -1
  7. data/ext/datasketches/ext.h +4 -0
  8. data/ext/datasketches/extconf.rb +1 -1
  9. data/ext/datasketches/fi_wrapper.cpp +6 -8
  10. data/ext/datasketches/hll_wrapper.cpp +13 -14
  11. data/ext/datasketches/kll_wrapper.cpp +28 -76
  12. data/ext/datasketches/theta_wrapper.cpp +27 -41
  13. data/ext/datasketches/vo_wrapper.cpp +4 -6
  14. data/lib/datasketches/version.rb +1 -1
  15. data/vendor/datasketches-cpp/CMakeLists.txt +10 -0
  16. data/vendor/datasketches-cpp/LICENSE +40 -3
  17. data/vendor/datasketches-cpp/NOTICE +1 -1
  18. data/vendor/datasketches-cpp/README.md +4 -4
  19. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +18 -7
  20. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  21. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  24. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  25. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  26. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  27. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  28. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  29. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +13 -3
  31. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +20 -20
  32. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +116 -105
  33. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +22 -6
  34. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +140 -101
  35. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  36. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +20 -20
  37. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -16
  38. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +6 -6
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +10 -10
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +21 -21
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  42. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  43. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  46. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  47. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +102 -105
  48. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  49. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +141 -125
  50. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  51. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +5 -5
  52. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  53. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +81 -109
  54. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +25 -24
  55. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  56. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +5 -5
  57. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +89 -105
  58. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +13 -13
  59. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +130 -165
  60. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +21 -22
  61. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  62. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  63. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  64. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  65. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +88 -83
  66. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  67. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +34 -45
  68. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +7 -8
  69. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +41 -52
  70. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +7 -8
  71. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +220 -251
  72. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +42 -42
  73. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +36 -38
  74. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  75. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +15 -14
  76. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +47 -44
  77. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +62 -87
  78. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +121 -128
  79. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  80. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  81. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  82. data/vendor/datasketches-cpp/hll/include/hll.hpp +25 -53
  83. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +8 -8
  84. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +36 -36
  85. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +28 -28
  86. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  87. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +37 -37
  88. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +57 -61
  89. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  90. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  91. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  92. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  93. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  94. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  95. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +40 -25
  96. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +50 -6
  97. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +164 -136
  98. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  99. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  100. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  101. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  102. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +178 -88
  103. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  104. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  105. data/vendor/datasketches-cpp/python/CMakeLists.txt +12 -6
  106. data/vendor/datasketches-cpp/python/README.md +52 -49
  107. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  108. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  109. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  110. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -6
  111. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +4 -2
  112. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  113. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +38 -28
  114. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  115. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  116. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -2
  117. data/vendor/datasketches-cpp/python/tests/kll_test.py +5 -5
  118. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  119. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  120. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  121. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  122. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  123. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +18 -8
  124. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  125. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +488 -0
  126. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  127. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  128. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  129. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  130. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  131. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  132. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  133. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  134. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  135. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  136. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  137. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  138. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +19 -13
  139. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +130 -127
  140. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  141. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +41 -49
  142. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  143. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  144. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  145. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -44
  146. data/vendor/datasketches-cpp/setup.py +11 -6
  147. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  148. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +3 -2
  149. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  150. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  151. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
  152. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  153. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  154. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  155. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +11 -4
  156. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  157. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +26 -28
  158. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  159. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  160. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  161. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  162. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +24 -36
  163. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  164. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  165. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +163 -256
  166. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +250 -651
  167. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  168. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  169. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +6 -1
  170. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  171. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +10 -21
  172. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +44 -30
  173. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  174. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  175. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  176. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +60 -5
  177. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +74 -235
  178. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
  179. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  180. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
  181. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  182. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  183. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +57 -70
  184. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  185. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  186. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +18 -21
  187. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +13 -16
  188. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +7 -6
  189. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +3 -3
  190. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
  191. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +13 -16
  192. metadata +51 -36
  193. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  194. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  195. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  196. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  197. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  198. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  199. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  200. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  201. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  202. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  203. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  204. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  205. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -34,140 +34,133 @@ namespace datasketches {
34
34
 
35
35
  enum hll_mode { LIST = 0, SET, HLL };
36
36
 
37
+ namespace hll_constants {
38
+
39
+ // preamble stuff
40
+ static const uint8_t SER_VER = 1;
41
+ static const uint8_t FAMILY_ID = 7;
42
+
43
+ static const uint8_t EMPTY_FLAG_MASK = 4;
44
+ static const uint8_t COMPACT_FLAG_MASK = 8;
45
+ static const uint8_t OUT_OF_ORDER_FLAG_MASK = 16;
46
+ static const uint8_t FULL_SIZE_FLAG_MASK = 32;
47
+
48
+ static const uint32_t PREAMBLE_INTS_BYTE = 0;
49
+ static const uint32_t SER_VER_BYTE = 1;
50
+ static const uint32_t FAMILY_BYTE = 2;
51
+ static const uint32_t LG_K_BYTE = 3;
52
+ static const uint32_t LG_ARR_BYTE = 4;
53
+ static const uint32_t FLAGS_BYTE = 5;
54
+ static const uint32_t LIST_COUNT_BYTE = 6;
55
+ static const uint32_t HLL_CUR_MIN_BYTE = 6;
56
+ static const uint32_t MODE_BYTE = 7; // lo2bits = curMode, next 2 bits = tgtHllMode
57
+
58
+ // Coupon List
59
+ static const uint32_t LIST_INT_ARR_START = 8;
60
+ static const uint8_t LIST_PREINTS = 2;
61
+ // Coupon Hash Set
62
+ static const uint32_t HASH_SET_COUNT_INT = 8;
63
+ static const uint32_t HASH_SET_INT_ARR_START = 12;
64
+ static const uint8_t HASH_SET_PREINTS = 3;
65
+ // HLL
66
+ static const uint8_t HLL_PREINTS = 10;
67
+ static const uint32_t HLL_BYTE_ARR_START = 40;
68
+ static const uint32_t HIP_ACCUM_DOUBLE = 8;
69
+ static const uint32_t KXQ0_DOUBLE = 16;
70
+ static const uint32_t KXQ1_DOUBLE = 24;
71
+ static const uint32_t CUR_MIN_COUNT_INT = 32;
72
+ static const uint32_t AUX_COUNT_INT = 36;
73
+
74
+ static const uint32_t EMPTY_SKETCH_SIZE_BYTES = 8;
75
+
76
+ // other HllUtil stuff
77
+ static const uint8_t KEY_BITS_26 = 26;
78
+ static const uint8_t VAL_BITS_6 = 6;
79
+ static const uint32_t KEY_MASK_26 = (1 << KEY_BITS_26) - 1;
80
+ static const uint32_t VAL_MASK_6 = (1 << VAL_BITS_6) - 1;
81
+ static const uint32_t EMPTY = 0;
82
+ static const uint8_t MIN_LOG_K = 4;
83
+ static const uint8_t MAX_LOG_K = 21;
84
+
85
+ static const double HLL_HIP_RSE_FACTOR = 0.8325546; // sqrt(ln(2))
86
+ static const double HLL_NON_HIP_RSE_FACTOR = 1.03896; // sqrt((3 * ln(2)) - 1)
87
+ static const double COUPON_RSE_FACTOR = 0.409; // at transition point not the asymptote
88
+ static const double COUPON_RSE = COUPON_RSE_FACTOR / (1 << 13);
89
+
90
+ static const uint8_t LG_INIT_LIST_SIZE = 3;
91
+ static const uint8_t LG_INIT_SET_SIZE = 5;
92
+ static const uint32_t RESIZE_NUMER = 3;
93
+ static const uint32_t RESIZE_DENOM = 4;
94
+
95
+ static const uint8_t loNibbleMask = 0x0f;
96
+ static const uint8_t hiNibbleMask = 0xf0;
97
+ static const uint8_t AUX_TOKEN = 0xf;
98
+
99
+ /**
100
+ * Log2 table sizes for exceptions based on lgK from 0 to 26.
101
+ * However, only lgK from 4 to 21 are used.
102
+ */
103
+ static const uint8_t LG_AUX_ARR_INTS[] = {
104
+ 0, 2, 2, 2, 2, 2, 2, 3, 3, 3, // 0 - 9
105
+ 4, 4, 5, 5, 6, 7, 8, 9, 10, 11, // 10-19
106
+ 12, 13, 14, 15, 16, 17, 18 // 20-26
107
+ };
108
+
109
+ } // namespace hll_constants
110
+
111
+
37
112
  // template provides internal consistency and allows static float values
38
113
  // but we don't use the template parameter anywhere
39
- template<typename A = std::allocator<char> >
114
+ template<typename A = std::allocator<uint8_t> >
40
115
  class HllUtil final {
41
116
  public:
42
- // preamble stuff
43
- static const int SER_VER = 1;
44
- static const int FAMILY_ID = 7;
45
-
46
- static const int EMPTY_FLAG_MASK = 4;
47
- static const int COMPACT_FLAG_MASK = 8;
48
- static const int OUT_OF_ORDER_FLAG_MASK = 16;
49
- static const int FULL_SIZE_FLAG_MASK = 32;
50
-
51
- static const int PREAMBLE_INTS_BYTE = 0;
52
- static const int SER_VER_BYTE = 1;
53
- static const int FAMILY_BYTE = 2;
54
- static const int LG_K_BYTE = 3;
55
- static const int LG_ARR_BYTE = 4;
56
- static const int FLAGS_BYTE = 5;
57
- static const int LIST_COUNT_BYTE = 6;
58
- static const int HLL_CUR_MIN_BYTE = 6;
59
- static const int MODE_BYTE = 7; // lo2bits = curMode, next 2 bits = tgtHllMode
60
-
61
- // Coupon List
62
- static const int LIST_INT_ARR_START = 8;
63
- static const int LIST_PREINTS = 2;
64
- // Coupon Hash Set
65
- static const int HASH_SET_COUNT_INT = 8;
66
- static const int HASH_SET_INT_ARR_START = 12;
67
- static const int HASH_SET_PREINTS = 3;
68
- // HLL
69
- static const int HLL_PREINTS = 10;
70
- static const int HLL_BYTE_ARR_START = 40;
71
- static const int HIP_ACCUM_DOUBLE = 8;
72
- static const int KXQ0_DOUBLE = 16;
73
- static const int KXQ1_DOUBLE = 24;
74
- static const int CUR_MIN_COUNT_INT = 32;
75
- static const int AUX_COUNT_INT = 36;
76
-
77
- static const int EMPTY_SKETCH_SIZE_BYTES = 8;
78
-
79
- // other HllUtil stuff
80
- static const int KEY_BITS_26 = 26;
81
- static const int VAL_BITS_6 = 6;
82
- static const int KEY_MASK_26 = (1 << KEY_BITS_26) - 1;
83
- static const int VAL_MASK_6 = (1 << VAL_BITS_6) - 1;
84
- static const int EMPTY = 0;
85
- static const int MIN_LOG_K = 4;
86
- static const int MAX_LOG_K = 21;
87
-
88
- static const double HLL_HIP_RSE_FACTOR; // sqrt(log(2.0)) = 0.8325546
89
- static const double HLL_NON_HIP_RSE_FACTOR; // sqrt((3.0 * log(2.0)) - 1.0) = 1.03896
90
- static const double COUPON_RSE_FACTOR; // 0.409 at transition point not the asymptote
91
- static const double COUPON_RSE; // COUPON_RSE_FACTOR / (1 << 13);
92
-
93
- static const int LG_INIT_LIST_SIZE = 3;
94
- static const int LG_INIT_SET_SIZE = 5;
95
- static const int RESIZE_NUMER = 3;
96
- static const int RESIZE_DENOM = 4;
97
-
98
- static const int loNibbleMask = 0x0f;
99
- static const int hiNibbleMask = 0xf0;
100
- static const int AUX_TOKEN = 0xf;
101
-
102
- /**
103
- * Log2 table sizes for exceptions based on lgK from 0 to 26.
104
- * However, only lgK from 4 to 21 are used.
105
- */
106
- static const int LG_AUX_ARR_INTS[];
107
-
108
- static int coupon(const uint64_t hash[]);
109
- static int coupon(const HashState& hashState);
110
- static void hash(const void* key, int keyLen, uint64_t seed, HashState& result);
111
- static int checkLgK(int lgK);
117
+
118
+ static uint32_t coupon(const uint64_t hash[]);
119
+ static uint32_t coupon(const HashState& hashState);
120
+ static void hash(const void* key, size_t keyLen, uint64_t seed, HashState& result);
121
+ static uint8_t checkLgK(uint8_t lgK);
112
122
  static void checkMemSize(uint64_t minBytes, uint64_t capBytes);
113
- static inline void checkNumStdDev(int numStdDev);
114
- static int pair(int slotNo, int value);
115
- static int getLow26(unsigned int coupon);
116
- static int getValue(unsigned int coupon);
117
- static double invPow2(int e);
118
- static unsigned int ceilingPowerOf2(unsigned int n);
119
- static unsigned int simpleIntLog2(unsigned int n); // n must be power of 2
120
- static int computeLgArrInts(hll_mode mode, int count, int lgConfigK);
121
- static double getRelErr(bool upperBound, bool unioned,
122
- int lgConfigK, int numStdDev);
123
+ static inline void checkNumStdDev(uint8_t numStdDev);
124
+ static uint32_t pair(uint32_t slotNo, uint8_t value);
125
+ static uint32_t getLow26(uint32_t coupon);
126
+ static uint8_t getValue(uint32_t coupon);
127
+ static double invPow2(uint8_t e);
128
+ static uint8_t ceilingPowerOf2(uint32_t n);
129
+ static uint8_t simpleIntLog2(uint32_t n); // n must be power of 2
130
+ static uint8_t computeLgArrInts(hll_mode mode, uint32_t count, uint8_t lgConfigK);
131
+ static double getRelErr(bool upperBound, bool unioned, uint8_t lgConfigK, uint8_t numStdDev);
123
132
  };
124
133
 
125
134
  template<typename A>
126
- const double HllUtil<A>::HLL_HIP_RSE_FACTOR = sqrt(log(2.0)); // 0.8325546
127
- template<typename A>
128
- const double HllUtil<A>::HLL_NON_HIP_RSE_FACTOR = sqrt((3.0 * log(2.0)) - 1.0); // 1.03896
129
- template<typename A>
130
- const double HllUtil<A>::COUPON_RSE_FACTOR = 0.409;
131
- template<typename A>
132
- const double HllUtil<A>::COUPON_RSE = COUPON_RSE_FACTOR / (1 << 13);
133
-
134
- template<typename A>
135
- const int HllUtil<A>::LG_AUX_ARR_INTS[] = {
136
- 0, 2, 2, 2, 2, 2, 2, 3, 3, 3, // 0 - 9
137
- 4, 4, 5, 5, 6, 7, 8, 9, 10, 11, // 10-19
138
- 12, 13, 14, 15, 16, 17, 18 // 20-26
139
- };
140
-
141
- template<typename A>
142
- inline int HllUtil<A>::coupon(const uint64_t hash[]) {
143
- int addr26 = (int) (hash[0] & KEY_MASK_26);
144
- int lz = count_leading_zeros_in_u64(hash[1]);
145
- int value = ((lz > 62 ? 62 : lz) + 1);
146
- return (value << KEY_BITS_26) | addr26;
135
+ inline uint32_t HllUtil<A>::coupon(const uint64_t hash[]) {
136
+ uint32_t addr26 = hash[0] & hll_constants::KEY_MASK_26;
137
+ uint8_t lz = count_leading_zeros_in_u64(hash[1]);
138
+ uint8_t value = ((lz > 62 ? 62 : lz) + 1);
139
+ return (value << hll_constants::KEY_BITS_26) | addr26;
147
140
  }
148
141
 
149
142
  template<typename A>
150
- inline int HllUtil<A>::coupon(const HashState& hashState) {
151
- int addr26 = (int) (hashState.h1 & KEY_MASK_26);
152
- int lz = count_leading_zeros_in_u64(hashState.h2);
153
- int value = ((lz > 62 ? 62 : lz) + 1);
154
- return (value << KEY_BITS_26) | addr26;
143
+ inline uint32_t HllUtil<A>::coupon(const HashState& hashState) {
144
+ uint32_t addr26 = (int) (hashState.h1 & hll_constants::KEY_MASK_26);
145
+ uint8_t lz = count_leading_zeros_in_u64(hashState.h2);
146
+ uint8_t value = ((lz > 62 ? 62 : lz) + 1);
147
+ return (value << hll_constants::KEY_BITS_26) | addr26;
155
148
  }
156
149
 
157
150
  template<typename A>
158
- inline void HllUtil<A>::hash(const void* key, const int keyLen, const uint64_t seed, HashState& result) {
151
+ inline void HllUtil<A>::hash(const void* key, size_t keyLen, uint64_t seed, HashState& result) {
159
152
  MurmurHash3_x64_128(key, keyLen, seed, result);
160
153
  }
161
154
 
162
155
  template<typename A>
163
- inline double HllUtil<A>::getRelErr(const bool upperBound, const bool unioned,
164
- const int lgConfigK, const int numStdDev) {
156
+ inline double HllUtil<A>::getRelErr(bool upperBound, bool unioned,
157
+ uint8_t lgConfigK, uint8_t numStdDev) {
165
158
  return RelativeErrorTables<A>::getRelErr(upperBound, unioned, lgConfigK, numStdDev);
166
159
  }
167
160
 
168
161
  template<typename A>
169
- inline int HllUtil<A>::checkLgK(const int lgK) {
170
- if ((lgK >= HllUtil<A>::MIN_LOG_K) && (lgK <= HllUtil<A>::MAX_LOG_K)) {
162
+ inline uint8_t HllUtil<A>::checkLgK(uint8_t lgK) {
163
+ if ((lgK >= hll_constants::MIN_LOG_K) && (lgK <= hll_constants::MAX_LOG_K)) {
171
164
  return lgK;
172
165
  } else {
173
166
  throw std::invalid_argument("Invalid value of k: " + std::to_string(lgK));
@@ -175,36 +168,36 @@ inline int HllUtil<A>::checkLgK(const int lgK) {
175
168
  }
176
169
 
177
170
  template<typename A>
178
- inline void HllUtil<A>::checkMemSize(const uint64_t minBytes, const uint64_t capBytes) {
171
+ inline void HllUtil<A>::checkMemSize(uint64_t minBytes, uint64_t capBytes) {
179
172
  if (capBytes < minBytes) {
180
173
  throw std::invalid_argument("Given destination array is not large enough: " + std::to_string(capBytes));
181
174
  }
182
175
  }
183
176
 
184
177
  template<typename A>
185
- inline void HllUtil<A>::checkNumStdDev(const int numStdDev) {
178
+ inline void HllUtil<A>::checkNumStdDev(uint8_t numStdDev) {
186
179
  if ((numStdDev < 1) || (numStdDev > 3)) {
187
180
  throw std::invalid_argument("NumStdDev may not be less than 1 or greater than 3.");
188
181
  }
189
182
  }
190
183
 
191
184
  template<typename A>
192
- inline int HllUtil<A>::pair(const int slotNo, const int value) {
193
- return (value << HllUtil<A>::KEY_BITS_26) | (slotNo & HllUtil<A>::KEY_MASK_26);
185
+ inline uint32_t HllUtil<A>::pair(uint32_t slotNo, uint8_t value) {
186
+ return (value << hll_constants::KEY_BITS_26) | (slotNo & hll_constants::KEY_MASK_26);
194
187
  }
195
188
 
196
189
  template<typename A>
197
- inline int HllUtil<A>::getLow26(const unsigned int coupon) {
198
- return coupon & HllUtil<A>::KEY_MASK_26;
190
+ inline uint32_t HllUtil<A>::getLow26(uint32_t coupon) {
191
+ return coupon & hll_constants::KEY_MASK_26;
199
192
  }
200
193
 
201
194
  template<typename A>
202
- inline int HllUtil<A>::getValue(const unsigned int coupon) {
203
- return coupon >> HllUtil<A>::KEY_BITS_26;
195
+ inline uint8_t HllUtil<A>::getValue(uint32_t coupon) {
196
+ return coupon >> hll_constants::KEY_BITS_26;
204
197
  }
205
198
 
206
199
  template<typename A>
207
- inline double HllUtil<A>::invPow2(const int e) {
200
+ inline double HllUtil<A>::invPow2(uint8_t e) {
208
201
  union {
209
202
  long long longVal;
210
203
  double doubleVal;
@@ -214,7 +207,7 @@ inline double HllUtil<A>::invPow2(const int e) {
214
207
  }
215
208
 
216
209
  template<typename A>
217
- inline uint32_t HllUtil<A>::simpleIntLog2(uint32_t n) {
210
+ inline uint8_t HllUtil<A>::simpleIntLog2(uint32_t n) {
218
211
  if (n == 0) {
219
212
  throw std::logic_error("cannot take log of 0");
220
213
  }
@@ -222,16 +215,16 @@ inline uint32_t HllUtil<A>::simpleIntLog2(uint32_t n) {
222
215
  }
223
216
 
224
217
  template<typename A>
225
- inline int HllUtil<A>::computeLgArrInts(hll_mode mode, int count, int lgConfigK) {
218
+ inline uint8_t HllUtil<A>::computeLgArrInts(hll_mode mode, uint32_t count, uint8_t lgConfigK) {
226
219
  // assume value missing and recompute
227
- if (mode == LIST) { return HllUtil<A>::LG_INIT_LIST_SIZE; }
228
- int ceilPwr2 = ceiling_power_of_2(count);
229
- if ((HllUtil<A>::RESIZE_DENOM * count) > (HllUtil<A>::RESIZE_NUMER * ceilPwr2)) { ceilPwr2 <<= 1;}
220
+ if (mode == LIST) { return hll_constants::LG_INIT_LIST_SIZE; }
221
+ uint32_t ceilPwr2 = ceiling_power_of_2(count);
222
+ if ((hll_constants::RESIZE_DENOM * count) > (hll_constants::RESIZE_NUMER * ceilPwr2)) { ceilPwr2 <<= 1;}
230
223
  if (mode == SET) {
231
- return fmax(HllUtil<A>::LG_INIT_SET_SIZE, HllUtil<A>::simpleIntLog2(ceilPwr2));
224
+ return std::max(hll_constants::LG_INIT_SET_SIZE, HllUtil<A>::simpleIntLog2(ceilPwr2));
232
225
  }
233
226
  //only used for HLL4
234
- return fmax(HllUtil<A>::LG_AUX_ARR_INTS[lgConfigK], HllUtil<A>::simpleIntLog2(ceilPwr2));
227
+ return std::max(hll_constants::LG_AUX_ARR_INTS[lgConfigK], HllUtil<A>::simpleIntLog2(ceilPwr2));
235
228
  }
236
229
 
237
230
  }
@@ -24,7 +24,7 @@
24
24
 
25
25
  namespace datasketches {
26
26
 
27
- template<typename A = std::allocator<char>>
27
+ template<typename A = std::allocator<uint8_t>>
28
28
  class RelativeErrorTables {
29
29
  public:
30
30
  /**
@@ -25,30 +25,30 @@
25
25
  namespace datasketches {
26
26
 
27
27
  template<typename A>
28
- coupon_iterator<A>::coupon_iterator(const int* array, size_t array_size, size_t index, bool all):
29
- array(array), array_size(array_size), index(index), all(all) {
30
- while (this->index < array_size) {
31
- if (all || array[this->index] != HllUtil<A>::EMPTY) break;
32
- this->index++;
28
+ coupon_iterator<A>::coupon_iterator(const uint32_t* array, size_t array_size, size_t index, bool all):
29
+ array_(array), array_size_(array_size), index_(index), all_(all) {
30
+ while (index_ < array_size_) {
31
+ if (all_ || array_[index_] != hll_constants::EMPTY) break;
32
+ ++index_;
33
33
  }
34
34
  }
35
35
 
36
36
  template<typename A>
37
37
  coupon_iterator<A>& coupon_iterator<A>::operator++() {
38
- while (++index < array_size) {
39
- if (all || array[index] != HllUtil<A>::EMPTY) break;
38
+ while (++index_ < array_size_) {
39
+ if (all_ || array_[index_] != hll_constants::EMPTY) break;
40
40
  }
41
41
  return *this;
42
42
  }
43
43
 
44
44
  template<typename A>
45
45
  bool coupon_iterator<A>::operator!=(const coupon_iterator& other) const {
46
- return index != other.index;
46
+ return index_ != other.index_;
47
47
  }
48
48
 
49
49
  template<typename A>
50
50
  uint32_t coupon_iterator<A>::operator*() const {
51
- return array[index];
51
+ return array_[index_];
52
52
  }
53
53
 
54
54
  }
@@ -25,15 +25,15 @@ namespace datasketches {
25
25
  template<typename A>
26
26
  class coupon_iterator: public std::iterator<std::input_iterator_tag, uint32_t> {
27
27
  public:
28
- coupon_iterator(const int* array, size_t array_slze, size_t index, bool all);
28
+ coupon_iterator(const uint32_t* array, size_t array_slze, size_t index, bool all);
29
29
  coupon_iterator& operator++();
30
30
  bool operator!=(const coupon_iterator& other) const;
31
31
  uint32_t operator*() const;
32
32
  private:
33
- const int* array;
34
- size_t array_size;
35
- size_t index;
36
- bool all;
33
+ const uint32_t* array_;
34
+ size_t array_size_;
35
+ size_t index_;
36
+ bool all_;
37
37
  };
38
38
 
39
39
  }
@@ -108,7 +108,7 @@ class hll_union_alloc;
108
108
  template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
109
109
  template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
110
110
 
111
- template<typename A = std::allocator<char> >
111
+ template<typename A = std::allocator<uint8_t> >
112
112
  class hll_sketch_alloc final {
113
113
  public:
114
114
  /**
@@ -119,7 +119,7 @@ class hll_sketch_alloc final {
119
119
  * keeping memory use constant (if HLL_6 or HLL_8) at the cost of
120
120
  * starting out using much more memory
121
121
  */
122
- explicit hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false);
122
+ explicit hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false, const A& allocator = A());
123
123
 
124
124
  /**
125
125
  * Copy constructor
@@ -140,14 +140,14 @@ class hll_sketch_alloc final {
140
140
  * Reconstructs a sketch from a serialized image on a stream.
141
141
  * @param is An input stream with a binary image of a sketch
142
142
  */
143
- static hll_sketch_alloc deserialize(std::istream& is);
143
+ static hll_sketch_alloc deserialize(std::istream& is, const A& allocator = A());
144
144
 
145
145
  /**
146
146
  * Reconstructs a sketch from a serialized image in a byte array.
147
147
  * @param is bytes An input array with a binary image of a sketch
148
148
  * @param len Length of the input array, in bytes
149
149
  */
150
- static hll_sketch_alloc deserialize(const void* bytes, size_t len);
150
+ static hll_sketch_alloc deserialize(const void* bytes, size_t len, const A& allocator = A());
151
151
 
152
152
  //! Class destructor
153
153
  virtual ~hll_sketch_alloc();
@@ -306,7 +306,7 @@ class hll_sketch_alloc final {
306
306
  * @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
307
307
  * @return The approximate lower bound.
308
308
  */
309
- double get_lower_bound(int num_std_dev) const;
309
+ double get_lower_bound(uint8_t num_std_dev) const;
310
310
 
311
311
  /**
312
312
  * Returns the approximate upper error bound given the specified
@@ -314,13 +314,13 @@ class hll_sketch_alloc final {
314
314
  * @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
315
315
  * @return The approximate upper bound.
316
316
  */
317
- double get_upper_bound(int num_std_dev) const;
317
+ double get_upper_bound(uint8_t num_std_dev) const;
318
318
 
319
319
  /**
320
320
  * Returns sketch's configured lg_k value.
321
321
  * @return Configured lg_k value.
322
322
  */
323
- int get_lg_config_k() const;
323
+ uint8_t get_lg_config_k() const;
324
324
 
325
325
  /**
326
326
  * Returns the sketch's target HLL mode (from #target_hll_type).
@@ -344,13 +344,13 @@ class hll_sketch_alloc final {
344
344
  * Returns the size of the sketch serialized in compact form.
345
345
  * @return Size of the sketch serialized in compact form, in bytes.
346
346
  */
347
- int get_compact_serialization_bytes() const;
347
+ uint32_t get_compact_serialization_bytes() const;
348
348
 
349
349
  /**
350
350
  * Returns the size of the sketch serialized without compaction.
351
351
  * @return Size of the sketch serialized without compaction, in bytes.
352
352
  */
353
- int get_updatable_serialization_bytes() const;
353
+ uint32_t get_updatable_serialization_bytes() const;
354
354
 
355
355
  /**
356
356
  * Returns the maximum size in bytes that this sketch can grow to
@@ -363,7 +363,7 @@ class hll_sketch_alloc final {
363
363
  * @param tgt_type the desired Hll type
364
364
  * @return the maximum size in bytes that this sketch can grow to.
365
365
  */
366
- static int get_max_updatable_serialization_bytes(int lg_k, target_hll_type tgt_type);
366
+ static uint32_t get_max_updatable_serialization_bytes(uint8_t lg_k, target_hll_type tgt_type);
367
367
 
368
368
  /**
369
369
  * Gets the current (approximate) Relative Error (RE) asymptotic values given several
@@ -376,18 +376,18 @@ class hll_sketch_alloc final {
376
376
  * @return the current (approximate) RelativeError
377
377
  */
378
378
  static double get_rel_err(bool upper_bound, bool unioned,
379
- int lg_config_k, int num_std_dev);
379
+ uint8_t lg_config_k, uint8_t num_std_dev);
380
380
 
381
381
  private:
382
382
  explicit hll_sketch_alloc(HllSketchImpl<A>* that);
383
383
 
384
- void coupon_update(int coupon);
384
+ void coupon_update(uint32_t coupon);
385
385
 
386
386
  std::string type_as_string() const;
387
387
  std::string mode_as_string() const;
388
388
 
389
389
  hll_mode get_current_mode() const;
390
- int get_serialization_version() const;
390
+ uint8_t get_serialization_version() const;
391
391
  bool is_out_of_order_flag() const;
392
392
  bool is_estimation_mode() const;
393
393
 
@@ -423,7 +423,7 @@ class hll_sketch_alloc final {
423
423
  * author Kevin Lang
424
424
  */
425
425
 
426
- template<typename A = std::allocator<char> >
426
+ template<typename A = std::allocator<uint8_t> >
427
427
  class hll_union_alloc {
428
428
  public:
429
429
  /**
@@ -431,7 +431,7 @@ class hll_union_alloc {
431
431
  * @param lg_max_k The maximum size, in log2, of k. The value must
432
432
  * be between 7 and 21, inclusive.
433
433
  */
434
- explicit hll_union_alloc(int lg_max_k);
434
+ explicit hll_union_alloc(uint8_t lg_max_k, const A& allocator = A());
435
435
 
436
436
  /**
437
437
  * Returns the current cardinality estimate
@@ -458,7 +458,7 @@ class hll_union_alloc {
458
458
  * @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
459
459
  * @return The approximate lower bound.
460
460
  */
461
- double get_lower_bound(int num_std_dev) const;
461
+ double get_lower_bound(uint8_t num_std_dev) const;
462
462
 
463
463
  /**
464
464
  * Returns the approximate upper error bound given the specified
@@ -466,25 +466,13 @@ class hll_union_alloc {
466
466
  * @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
467
467
  * @return The approximate upper bound.
468
468
  */
469
- double get_upper_bound(int num_std_dev) const;
470
-
471
- /**
472
- * Returns the size of the union serialized in compact form.
473
- * @return Size of the union serialized in compact form, in bytes.
474
- */
475
- int get_compact_serialization_bytes() const;
476
-
477
- /**
478
- * Returns the size of the union serialized without compaction.
479
- * @return Size of the union serialized without compaction, in bytes.
480
- */
481
- int get_updatable_serialization_bytes() const;
469
+ double get_upper_bound(uint8_t num_std_dev) const;
482
470
 
483
471
  /**
484
472
  * Returns union's configured lg_k value.
485
473
  * @return Configured lg_k value.
486
474
  */
487
- int get_lg_config_k() const;
475
+ uint8_t get_lg_config_k() const;
488
476
 
489
477
  /**
490
478
  * Returns the union's target HLL mode (from #target_hll_type).
@@ -492,12 +480,6 @@ class hll_union_alloc {
492
480
  */
493
481
  target_hll_type get_target_type() const;
494
482
 
495
- /**
496
- * Indicates if the union is currently stored compacted.
497
- * @return True if the union is stored in compact form.
498
- */
499
- bool is_compact() const;
500
-
501
483
  /**
502
484
  * Indicates if the union is currently empty.
503
485
  * @return True if the union is empty.
@@ -605,15 +587,6 @@ class hll_union_alloc {
605
587
  */
606
588
  void update(const void* data, size_t length_bytes);
607
589
 
608
- /**
609
- * Returns the maximum size in bytes that this union operator can grow to given a lg_k.
610
- *
611
- * @param lg_k The maximum Log2 of k for this union operator. This value must be
612
- * between 4 and 21 inclusively.
613
- * @return the maximum size in bytes that this union operator can grow to.
614
- */
615
- static int get_max_serialization_bytes(int lg_k);
616
-
617
590
  /**
618
591
  * Gets the current (approximate) Relative Error (RE) asymptotic values given several
619
592
  * parameters. This is used primarily for testing.
@@ -625,7 +598,7 @@ class hll_union_alloc {
625
598
  * @return the current (approximate) RelativeError
626
599
  */
627
600
  static double get_rel_err(bool upper_bound, bool unioned,
628
- int lg_config_k, int num_std_dev);
601
+ uint8_t lg_config_k, uint8_t num_std_dev);
629
602
 
630
603
  private:
631
604
 
@@ -638,22 +611,21 @@ class hll_union_alloc {
638
611
  * @param incoming_impl the given incoming sketch, which may not be modified.
639
612
  * @param lg_max_k the maximum value of log2 K for this union.
640
613
  */
641
- inline void union_impl(const hll_sketch_alloc<A>& sketch, int lg_max_k);
614
+ inline void union_impl(const hll_sketch_alloc<A>& sketch, uint8_t lg_max_k);
642
615
 
643
- static HllSketchImpl<A>* copy_or_downsample(const HllSketchImpl<A>* src_impl, int tgt_lg_k);
616
+ static HllSketchImpl<A>* copy_or_downsample(const HllSketchImpl<A>* src_impl, uint8_t tgt_lg_k);
644
617
 
645
- void coupon_update(int coupon);
618
+ void coupon_update(uint32_t coupon);
646
619
 
647
620
  hll_mode get_current_mode() const;
648
- int get_serialization_version() const;
649
621
  bool is_out_of_order_flag() const;
650
622
  bool is_estimation_mode() const;
651
623
 
652
624
  // calls couponUpdate on sketch, freeing the old sketch upon changes in hll_mode
653
- static HllSketchImpl<A>* leak_free_coupon_update(HllSketchImpl<A>* impl, int coupon);
625
+ static HllSketchImpl<A>* leak_free_coupon_update(HllSketchImpl<A>* impl, uint32_t coupon);
654
626
 
655
- int lg_max_k;
656
- hll_sketch_alloc<A> gadget;
627
+ uint8_t lg_max_k_;
628
+ hll_sketch_alloc<A> gadget_;
657
629
  };
658
630
 
659
631
  /// convenience alias for hll_sketch with default allocator
@@ -25,7 +25,7 @@
25
25
  namespace datasketches {
26
26
 
27
27
  TEST_CASE("aux hash map: check must replace", "[aux_hash_map]") {
28
- AuxHashMap<>* map = new AuxHashMap<>(3, 7);
28
+ AuxHashMap<std::allocator<uint8_t>>* map = new AuxHashMap<std::allocator<uint8_t>>(3, 7, std::allocator<uint8_t>());
29
29
  map->mustAdd(100, 5);
30
30
  int val = map->mustFindValueFor(100);
31
31
  REQUIRE(val == 5);
@@ -40,12 +40,12 @@ TEST_CASE("aux hash map: check must replace", "[aux_hash_map]") {
40
40
  }
41
41
 
42
42
  TEST_CASE("aux hash map: check grow space", "[aux_hash_map]") {
43
- auto map = std::unique_ptr<AuxHashMap<>, std::function<void(AuxHashMap<>*)>>(
44
- AuxHashMap<>::newAuxHashMap(3, 7),
45
- AuxHashMap<>::make_deleter()
43
+ auto map = std::unique_ptr<AuxHashMap<std::allocator<uint8_t>>, std::function<void(AuxHashMap<std::allocator<uint8_t>>*)>>(
44
+ AuxHashMap<std::allocator<uint8_t>>::newAuxHashMap(3, 7, std::allocator<uint8_t>()),
45
+ AuxHashMap<std::allocator<uint8_t>>::make_deleter()
46
46
  );
47
47
  REQUIRE(map->getLgAuxArrInts() == 3);
48
- for (int i = 1; i <= 7; ++i) {
48
+ for (uint8_t i = 1; i <= 7; ++i) {
49
49
  map->mustAdd(i, i);
50
50
  }
51
51
  REQUIRE(map->getLgAuxArrInts() == 4);
@@ -63,17 +63,17 @@ TEST_CASE("aux hash map: check grow space", "[aux_hash_map]") {
63
63
  }
64
64
 
65
65
  TEST_CASE("aux hash map: check exception must find value for", "[aux_hash_map]") {
66
- AuxHashMap<> map(3, 7);
66
+ AuxHashMap<std::allocator<uint8_t>> map(3, 7, std::allocator<uint8_t>());
67
67
  map.mustAdd(100, 5);
68
68
  REQUIRE_THROWS_AS(map.mustFindValueFor(101), std::invalid_argument);
69
69
  }
70
70
 
71
71
  TEST_CASE("aux hash map: check exception must add", "[aux_hash_map]") {
72
- AuxHashMap<>* map = AuxHashMap<>::newAuxHashMap(3, 7);
72
+ AuxHashMap<std::allocator<uint8_t>>* map = AuxHashMap<std::allocator<uint8_t>>::newAuxHashMap(3, 7, std::allocator<uint8_t>());
73
73
  map->mustAdd(100, 5);
74
74
  REQUIRE_THROWS_AS(map->mustAdd(100, 6), std::invalid_argument);
75
75
 
76
- AuxHashMap<>::make_deleter()(map);
76
+ AuxHashMap<std::allocator<uint8_t>>::make_deleter()(map);
77
77
  }
78
78
 
79
79
  } /* namespace datasketches */