datasketches 0.1.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (205) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +17 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  6. data/ext/datasketches/ext.cpp +1 -1
  7. data/ext/datasketches/ext.h +4 -0
  8. data/ext/datasketches/extconf.rb +1 -1
  9. data/ext/datasketches/fi_wrapper.cpp +6 -8
  10. data/ext/datasketches/hll_wrapper.cpp +13 -14
  11. data/ext/datasketches/kll_wrapper.cpp +28 -76
  12. data/ext/datasketches/theta_wrapper.cpp +27 -41
  13. data/ext/datasketches/vo_wrapper.cpp +4 -6
  14. data/lib/datasketches/version.rb +1 -1
  15. data/vendor/datasketches-cpp/CMakeLists.txt +10 -0
  16. data/vendor/datasketches-cpp/LICENSE +40 -3
  17. data/vendor/datasketches-cpp/NOTICE +1 -1
  18. data/vendor/datasketches-cpp/README.md +4 -4
  19. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +18 -7
  20. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  21. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  24. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  25. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  26. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  27. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  28. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  29. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +13 -3
  31. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +20 -20
  32. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +116 -105
  33. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +22 -6
  34. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +140 -101
  35. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  36. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +20 -20
  37. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -16
  38. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +6 -6
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +10 -10
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +21 -21
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  42. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  43. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  46. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  47. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +102 -105
  48. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  49. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +141 -125
  50. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  51. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +5 -5
  52. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  53. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +81 -109
  54. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +25 -24
  55. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  56. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +5 -5
  57. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +89 -105
  58. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +13 -13
  59. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +130 -165
  60. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +21 -22
  61. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  62. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  63. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  64. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  65. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +88 -83
  66. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  67. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +34 -45
  68. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +7 -8
  69. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +41 -52
  70. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +7 -8
  71. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +220 -251
  72. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +42 -42
  73. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +36 -38
  74. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  75. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +15 -14
  76. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +47 -44
  77. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +62 -87
  78. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +121 -128
  79. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  80. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  81. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  82. data/vendor/datasketches-cpp/hll/include/hll.hpp +25 -53
  83. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +8 -8
  84. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +36 -36
  85. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +28 -28
  86. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  87. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +37 -37
  88. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +57 -61
  89. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  90. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  91. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  92. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  93. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  94. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  95. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +40 -25
  96. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +50 -6
  97. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +164 -136
  98. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  99. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  100. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  101. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  102. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +178 -88
  103. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  104. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  105. data/vendor/datasketches-cpp/python/CMakeLists.txt +12 -6
  106. data/vendor/datasketches-cpp/python/README.md +52 -49
  107. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  108. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  109. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  110. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -6
  111. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +4 -2
  112. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  113. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +38 -28
  114. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  115. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  116. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -2
  117. data/vendor/datasketches-cpp/python/tests/kll_test.py +5 -5
  118. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  119. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  120. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  121. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  122. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  123. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +18 -8
  124. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  125. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +488 -0
  126. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  127. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  128. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  129. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  130. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  131. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  132. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  133. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  134. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  135. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  136. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  137. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  138. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +19 -13
  139. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +130 -127
  140. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  141. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +41 -49
  142. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  143. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  144. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  145. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -44
  146. data/vendor/datasketches-cpp/setup.py +11 -6
  147. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  148. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +3 -2
  149. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  150. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  151. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
  152. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  153. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  154. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  155. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +11 -4
  156. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  157. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +26 -28
  158. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  159. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  160. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  161. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  162. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +24 -36
  163. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  164. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  165. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +163 -256
  166. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +250 -651
  167. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  168. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  169. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +6 -1
  170. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  171. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +10 -21
  172. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +44 -30
  173. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  174. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  175. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  176. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +60 -5
  177. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +74 -235
  178. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
  179. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  180. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
  181. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  182. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  183. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +57 -70
  184. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  185. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  186. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +18 -21
  187. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +13 -16
  188. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +7 -6
  189. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +3 -3
  190. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
  191. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +13 -16
  192. metadata +51 -36
  193. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  194. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  195. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  196. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  197. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  198. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  199. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  200. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  201. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  202. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  203. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  204. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  205. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -35,51 +35,19 @@
35
35
  namespace datasketches {
36
36
 
37
37
  template<typename A>
38
- HllArray<A>::HllArray(const int lgConfigK, const target_hll_type tgtHllType, bool startFullSize)
39
- : HllSketchImpl<A>(lgConfigK, tgtHllType, hll_mode::HLL, startFullSize) {
40
- hipAccum = 0.0;
41
- kxq0 = 1 << lgConfigK;
42
- kxq1 = 0.0;
43
- curMin = 0;
44
- numAtCurMin = 1 << lgConfigK;
45
- oooFlag = false;
46
- hllByteArr = nullptr; // allocated in derived class
47
- }
48
-
49
- template<typename A>
50
- HllArray<A>::HllArray(const HllArray<A>& that):
51
- HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, hll_mode::HLL, that.startFullSize),
52
- hipAccum(that.hipAccum),
53
- kxq0(that.kxq0),
54
- kxq1(that.kxq1),
55
- hllByteArr(nullptr),
56
- curMin(that.curMin),
57
- numAtCurMin(that.numAtCurMin),
58
- oooFlag(that.oooFlag)
59
- {
60
- const int arrayLen = that.getHllByteArrBytes();
61
- typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
62
- hllByteArr = uint8Alloc().allocate(arrayLen);
63
- std::copy(that.hllByteArr, that.hllByteArr + arrayLen, hllByteArr);
64
- }
65
-
66
- template<typename A>
67
- HllArray<A>::~HllArray() {
68
- // need to determine number of bytes to deallocate
69
- int hllArrBytes = 0;
70
- if (this->tgtHllType == target_hll_type::HLL_4) {
71
- hllArrBytes = hll4ArrBytes(this->lgConfigK);
72
- } else if (this->tgtHllType == target_hll_type::HLL_6) {
73
- hllArrBytes = hll6ArrBytes(this->lgConfigK);
74
- } else { // tgtHllType == HLL_8
75
- hllArrBytes = hll8ArrBytes(this->lgConfigK);
76
- }
77
- typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
78
- uint8Alloc().deallocate(hllByteArr, hllArrBytes);
79
- }
80
-
81
- template<typename A>
82
- HllArray<A>* HllArray<A>::copyAs(const target_hll_type tgtHllType) const {
38
+ HllArray<A>::HllArray(uint8_t lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator):
39
+ HllSketchImpl<A>(lgConfigK, tgtHllType, hll_mode::HLL, startFullSize),
40
+ hipAccum_(0.0),
41
+ kxq0_(1 << lgConfigK),
42
+ kxq1_(0.0),
43
+ hllByteArr_(allocator),
44
+ curMin_(0),
45
+ numAtCurMin_(1 << lgConfigK),
46
+ oooFlag_(false)
47
+ {}
48
+
49
+ template<typename A>
50
+ HllArray<A>* HllArray<A>::copyAs(target_hll_type tgtHllType) const {
83
51
  if (tgtHllType == this->getTgtHllType()) {
84
52
  return static_cast<HllArray*>(copy());
85
53
  }
@@ -93,61 +61,61 @@ HllArray<A>* HllArray<A>::copyAs(const target_hll_type tgtHllType) const {
93
61
  }
94
62
 
95
63
  template<typename A>
96
- HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) {
97
- if (len < HllUtil<A>::HLL_BYTE_ARR_START) {
64
+ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len, const A& allocator) {
65
+ if (len < hll_constants::HLL_BYTE_ARR_START) {
98
66
  throw std::out_of_range("Input data length insufficient to hold HLL array");
99
67
  }
100
68
 
101
69
  const uint8_t* data = static_cast<const uint8_t*>(bytes);
102
- if (data[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HLL_PREINTS) {
70
+ if (data[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::HLL_PREINTS) {
103
71
  throw std::invalid_argument("Incorrect number of preInts in input stream");
104
72
  }
105
- if (data[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
73
+ if (data[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
106
74
  throw std::invalid_argument("Wrong ser ver in input stream");
107
75
  }
108
- if (data[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
76
+ if (data[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
109
77
  throw std::invalid_argument("Input array is not an HLL sketch");
110
78
  }
111
79
 
112
- const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
80
+ const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[hll_constants::MODE_BYTE]);
113
81
  if (mode != HLL) {
114
- throw std::invalid_argument("Calling HLL array construtor with non-HLL mode data");
82
+ throw std::invalid_argument("Calling HLL array constructor with non-HLL mode data");
115
83
  }
116
84
 
117
- const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
118
- const bool oooFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
119
- const bool comapctFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
120
- const bool startFullSizeFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::FULL_SIZE_FLAG_MASK) ? true : false);
85
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[hll_constants::MODE_BYTE]);
86
+ const bool oooFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::OUT_OF_ORDER_FLAG_MASK) ? true : false);
87
+ const bool comapctFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ? true : false);
88
+ const bool startFullSizeFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::FULL_SIZE_FLAG_MASK) ? true : false);
121
89
 
122
- const int lgK = (int) data[HllUtil<A>::LG_K_BYTE];
123
- const int curMin = (int) data[HllUtil<A>::HLL_CUR_MIN_BYTE];
90
+ const uint8_t lgK = data[hll_constants::LG_K_BYTE];
91
+ const uint8_t curMin = data[hll_constants::HLL_CUR_MIN_BYTE];
124
92
 
125
- const int arrayBytes = hllArrBytes(tgtHllType, lgK);
126
- if (len < static_cast<size_t>(HllUtil<A>::HLL_BYTE_ARR_START + arrayBytes)) {
93
+ const uint32_t arrayBytes = hllArrBytes(tgtHllType, lgK);
94
+ if (len < static_cast<size_t>(hll_constants::HLL_BYTE_ARR_START + arrayBytes)) {
127
95
  throw std::out_of_range("Input array too small to hold sketch image");
128
96
  }
129
97
 
130
98
  double hip, kxq0, kxq1;
131
- std::memcpy(&hip, data + HllUtil<A>::HIP_ACCUM_DOUBLE, sizeof(double));
132
- std::memcpy(&kxq0, data + HllUtil<A>::KXQ0_DOUBLE, sizeof(double));
133
- std::memcpy(&kxq1, data + HllUtil<A>::KXQ1_DOUBLE, sizeof(double));
99
+ std::memcpy(&hip, data + hll_constants::HIP_ACCUM_DOUBLE, sizeof(double));
100
+ std::memcpy(&kxq0, data + hll_constants::KXQ0_DOUBLE, sizeof(double));
101
+ std::memcpy(&kxq1, data + hll_constants::KXQ1_DOUBLE, sizeof(double));
134
102
 
135
- int numAtCurMin, auxCount;
136
- std::memcpy(&numAtCurMin, data + HllUtil<A>::CUR_MIN_COUNT_INT, sizeof(int));
137
- std::memcpy(&auxCount, data + HllUtil<A>::AUX_COUNT_INT, sizeof(int));
103
+ uint32_t numAtCurMin, auxCount;
104
+ std::memcpy(&numAtCurMin, data + hll_constants::CUR_MIN_COUNT_INT, sizeof(int));
105
+ std::memcpy(&auxCount, data + hll_constants::AUX_COUNT_INT, sizeof(int));
138
106
 
139
107
  AuxHashMap<A>* auxHashMap = nullptr;
140
108
  typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
141
109
  aux_hash_map_ptr aux_ptr;
142
110
  if (auxCount > 0) { // necessarily TgtHllType == HLL_4
143
- int auxLgIntArrSize = (int) data[4];
144
- const size_t offset = HllUtil<A>::HLL_BYTE_ARR_START + arrayBytes;
111
+ uint8_t auxLgIntArrSize = data[4];
112
+ const size_t offset = hll_constants::HLL_BYTE_ARR_START + arrayBytes;
145
113
  const uint8_t* auxDataStart = data + offset;
146
- auxHashMap = AuxHashMap<A>::deserialize(auxDataStart, len - offset, lgK, auxCount, auxLgIntArrSize, comapctFlag);
114
+ auxHashMap = AuxHashMap<A>::deserialize(auxDataStart, len - offset, lgK, auxCount, auxLgIntArrSize, comapctFlag, allocator);
147
115
  aux_ptr = aux_hash_map_ptr(auxHashMap, auxHashMap->make_deleter());
148
116
  }
149
117
 
150
- HllArray<A>* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag);
118
+ HllArray<A>* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag, allocator);
151
119
  sketch->putCurMin(curMin);
152
120
  sketch->putOutOfOrderFlag(oooFlag);
153
121
  if (!oooFlag) sketch->putHipAccum(hip);
@@ -155,7 +123,7 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) {
155
123
  sketch->putKxQ1(kxq1);
156
124
  sketch->putNumAtCurMin(numAtCurMin);
157
125
 
158
- std::memcpy(sketch->hllByteArr, data + HllUtil<A>::HLL_BYTE_ARR_START, arrayBytes);
126
+ std::memcpy(sketch->hllByteArr_.data(), data + hll_constants::HLL_BYTE_ARR_START, arrayBytes);
159
127
 
160
128
  if (auxHashMap != nullptr)
161
129
  ((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
@@ -165,57 +133,55 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) {
165
133
  }
166
134
 
167
135
  template<typename A>
168
- HllArray<A>* HllArray<A>::newHll(std::istream& is) {
136
+ HllArray<A>* HllArray<A>::newHll(std::istream& is, const A& allocator) {
169
137
  uint8_t listHeader[8];
170
- is.read((char*)listHeader, 8 * sizeof(uint8_t));
138
+ read(is, listHeader, 8 * sizeof(uint8_t));
171
139
 
172
- if (listHeader[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HLL_PREINTS) {
140
+ if (listHeader[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::HLL_PREINTS) {
173
141
  throw std::invalid_argument("Incorrect number of preInts in input stream");
174
142
  }
175
- if (listHeader[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
143
+ if (listHeader[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
176
144
  throw std::invalid_argument("Wrong ser ver in input stream");
177
145
  }
178
- if (listHeader[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
146
+ if (listHeader[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
179
147
  throw std::invalid_argument("Input stream is not an HLL sketch");
180
148
  }
181
149
 
182
- hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
150
+ hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[hll_constants::MODE_BYTE]);
183
151
  if (mode != HLL) {
184
152
  throw std::invalid_argument("Calling HLL construtor with non-HLL mode data");
185
153
  }
186
154
 
187
- const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
188
- const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
189
- const bool comapctFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
190
- const bool startFullSizeFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::FULL_SIZE_FLAG_MASK) ? true : false);
155
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[hll_constants::MODE_BYTE]);
156
+ const bool oooFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::OUT_OF_ORDER_FLAG_MASK) ? true : false);
157
+ const bool comapctFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ? true : false);
158
+ const bool startFullSizeFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::FULL_SIZE_FLAG_MASK) ? true : false);
191
159
 
192
- const int lgK = (int) listHeader[HllUtil<A>::LG_K_BYTE];
193
- const int curMin = (int) listHeader[HllUtil<A>::HLL_CUR_MIN_BYTE];
160
+ const uint8_t lgK = listHeader[hll_constants::LG_K_BYTE];
161
+ const uint8_t curMin = listHeader[hll_constants::HLL_CUR_MIN_BYTE];
194
162
 
195
- HllArray* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag);
163
+ HllArray* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag, allocator);
196
164
  typedef std::unique_ptr<HllArray<A>, std::function<void(HllSketchImpl<A>*)>> hll_array_ptr;
197
165
  hll_array_ptr sketch_ptr(sketch, sketch->get_deleter());
198
166
  sketch->putCurMin(curMin);
199
167
  sketch->putOutOfOrderFlag(oooFlag);
200
168
 
201
- double hip, kxq0, kxq1;
202
- is.read((char*)&hip, sizeof(hip));
203
- is.read((char*)&kxq0, sizeof(kxq0));
204
- is.read((char*)&kxq1, sizeof(kxq1));
169
+ const auto hip = read<double>(is);
170
+ const auto kxq0 = read<double>(is);
171
+ const auto kxq1 = read<double>(is);
205
172
  if (!oooFlag) sketch->putHipAccum(hip);
206
173
  sketch->putKxQ0(kxq0);
207
174
  sketch->putKxQ1(kxq1);
208
175
 
209
- int numAtCurMin, auxCount;
210
- is.read((char*)&numAtCurMin, sizeof(numAtCurMin));
211
- is.read((char*)&auxCount, sizeof(auxCount));
176
+ const auto numAtCurMin = read<uint32_t>(is);
177
+ const auto auxCount = read<uint32_t>(is);
212
178
  sketch->putNumAtCurMin(numAtCurMin);
213
179
 
214
- is.read((char*)sketch->hllByteArr, sketch->getHllByteArrBytes());
180
+ read(is, sketch->hllByteArr_.data(), sketch->getHllByteArrBytes());
215
181
 
216
182
  if (auxCount > 0) { // necessarily TgtHllType == HLL_4
217
- int auxLgIntArrSize = listHeader[4];
218
- AuxHashMap<A>* auxHashMap = AuxHashMap<A>::deserialize(is, lgK, auxCount, auxLgIntArrSize, comapctFlag);
183
+ uint8_t auxLgIntArrSize = listHeader[4];
184
+ AuxHashMap<A>* auxHashMap = AuxHashMap<A>::deserialize(is, lgK, auxCount, auxLgIntArrSize, comapctFlag, allocator);
219
185
  ((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
220
186
  }
221
187
 
@@ -228,35 +194,35 @@ HllArray<A>* HllArray<A>::newHll(std::istream& is) {
228
194
  template<typename A>
229
195
  vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) const {
230
196
  const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
231
- vector_u8<A> byteArr(sketchSizeBytes);
197
+ vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
232
198
  uint8_t* bytes = byteArr.data() + header_size_bytes;
233
199
  AuxHashMap<A>* auxHashMap = getAuxHashMap();
234
200
 
235
- bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
236
- bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER);
237
- bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID);
238
- bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK);
239
- bytes[HllUtil<A>::LG_ARR_BYTE] = static_cast<uint8_t>(auxHashMap == nullptr ? 0 : auxHashMap->getLgAuxArrInts());
240
- bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact);
241
- bytes[HllUtil<A>::HLL_CUR_MIN_BYTE] = static_cast<uint8_t>(curMin);
242
- bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte();
243
-
244
- std::memcpy(bytes + HllUtil<A>::HIP_ACCUM_DOUBLE, &hipAccum, sizeof(double));
245
- std::memcpy(bytes + HllUtil<A>::KXQ0_DOUBLE, &kxq0, sizeof(double));
246
- std::memcpy(bytes + HllUtil<A>::KXQ1_DOUBLE, &kxq1, sizeof(double));
247
- std::memcpy(bytes + HllUtil<A>::CUR_MIN_COUNT_INT, &numAtCurMin, sizeof(int));
248
- const int auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
249
- std::memcpy(bytes + HllUtil<A>::AUX_COUNT_INT, &auxCount, sizeof(int));
250
-
251
- const int hllByteArrBytes = getHllByteArrBytes();
252
- std::memcpy(bytes + getMemDataStart(), hllByteArr, hllByteArrBytes);
201
+ bytes[hll_constants::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
202
+ bytes[hll_constants::SER_VER_BYTE] = static_cast<uint8_t>(hll_constants::SER_VER);
203
+ bytes[hll_constants::FAMILY_BYTE] = static_cast<uint8_t>(hll_constants::FAMILY_ID);
204
+ bytes[hll_constants::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK_);
205
+ bytes[hll_constants::LG_ARR_BYTE] = static_cast<uint8_t>(auxHashMap == nullptr ? 0 : auxHashMap->getLgAuxArrInts());
206
+ bytes[hll_constants::FLAGS_BYTE] = this->makeFlagsByte(compact);
207
+ bytes[hll_constants::HLL_CUR_MIN_BYTE] = static_cast<uint8_t>(curMin_);
208
+ bytes[hll_constants::MODE_BYTE] = this->makeModeByte();
209
+
210
+ std::memcpy(bytes + hll_constants::HIP_ACCUM_DOUBLE, &hipAccum_, sizeof(double));
211
+ std::memcpy(bytes + hll_constants::KXQ0_DOUBLE, &kxq0_, sizeof(double));
212
+ std::memcpy(bytes + hll_constants::KXQ1_DOUBLE, &kxq1_, sizeof(double));
213
+ std::memcpy(bytes + hll_constants::CUR_MIN_COUNT_INT, &numAtCurMin_, sizeof(uint32_t));
214
+ const uint32_t auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
215
+ std::memcpy(bytes + hll_constants::AUX_COUNT_INT, &auxCount, sizeof(uint32_t));
216
+
217
+ const uint32_t hllByteArrBytes = getHllByteArrBytes();
218
+ std::memcpy(bytes + getMemDataStart(), hllByteArr_.data(), hllByteArrBytes);
253
219
 
254
220
  // aux map if HLL_4
255
- if (this->tgtHllType == HLL_4) {
221
+ if (this->tgtHllType_ == HLL_4) {
256
222
  bytes += getMemDataStart() + hllByteArrBytes; // start of auxHashMap
257
223
  if (auxHashMap != nullptr) {
258
224
  if (compact) {
259
- for (uint32_t coupon: *auxHashMap) {
225
+ for (const uint32_t coupon: *auxHashMap) {
260
226
  std::memcpy(bytes, &coupon, sizeof(coupon));
261
227
  bytes += sizeof(coupon);
262
228
  }
@@ -265,8 +231,8 @@ vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) co
265
231
  }
266
232
  } else if (!compact) {
267
233
  // if updatable, we write even if currently unused so the binary can be wrapped
268
- int auxBytes = 4 << HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK];
269
- std::fill_n(bytes, auxBytes, 0);
234
+ uint32_t auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_];
235
+ std::fill_n(bytes, auxBytes, static_cast<uint8_t>(0));
270
236
  }
271
237
  }
272
238
 
@@ -274,64 +240,63 @@ vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) co
274
240
  }
275
241
 
276
242
  template<typename A>
277
- void HllArray<A>::serialize(std::ostream& os, const bool compact) const {
243
+ void HllArray<A>::serialize(std::ostream& os, bool compact) const {
278
244
  // header
279
- const uint8_t preInts(getPreInts());
280
- os.write((char*)&preInts, sizeof(preInts));
281
- const uint8_t serialVersion(HllUtil<A>::SER_VER);
282
- os.write((char*)&serialVersion, sizeof(serialVersion));
283
- const uint8_t familyId(HllUtil<A>::FAMILY_ID);
284
- os.write((char*)&familyId, sizeof(familyId));
285
- const uint8_t lgKByte((uint8_t) this->lgConfigK);
286
- os.write((char*)&lgKByte, sizeof(lgKByte));
245
+ const uint8_t preInts = getPreInts();
246
+ write(os, preInts);
247
+ const uint8_t serialVersion = hll_constants::SER_VER;
248
+ write(os, serialVersion);
249
+ const uint8_t familyId = hll_constants::FAMILY_ID;
250
+ write(os, familyId);
251
+ const uint8_t lgKByte = this->lgConfigK_;
252
+ write(os, lgKByte);
287
253
 
288
254
  AuxHashMap<A>* auxHashMap = getAuxHashMap();
289
- uint8_t lgArrByte(0);
255
+ uint8_t lgArrByte = 0;
290
256
  if (auxHashMap != nullptr) {
291
257
  lgArrByte = auxHashMap->getLgAuxArrInts();
292
258
  }
293
- os.write((char*)&lgArrByte, sizeof(lgArrByte));
259
+ write(os, lgArrByte);
294
260
 
295
- const uint8_t flagsByte(this->makeFlagsByte(compact));
296
- os.write((char*)&flagsByte, sizeof(flagsByte));
297
- const uint8_t curMinByte((uint8_t) curMin);
298
- os.write((char*)&curMinByte, sizeof(curMinByte));
299
- const uint8_t modeByte(this->makeModeByte());
300
- os.write((char*)&modeByte, sizeof(modeByte));
261
+ const uint8_t flagsByte = this->makeFlagsByte(compact);
262
+ write(os, flagsByte);
263
+ write(os, curMin_);
264
+ const uint8_t modeByte = this->makeModeByte();
265
+ write(os, modeByte);
301
266
 
302
267
  // estimator data
303
- os.write((char*)&hipAccum, sizeof(hipAccum));
304
- os.write((char*)&kxq0, sizeof(kxq0));
305
- os.write((char*)&kxq1, sizeof(kxq1));
268
+ write(os, hipAccum_);
269
+ write(os, kxq0_);
270
+ write(os, kxq1_);
306
271
 
307
272
  // array data
308
- os.write((char*)&numAtCurMin, sizeof(numAtCurMin));
273
+ write(os, numAtCurMin_);
309
274
 
310
- const int auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
311
- os.write((char*)&auxCount, sizeof(auxCount));
312
- os.write((char*)hllByteArr, getHllByteArrBytes());
275
+ const uint32_t auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
276
+ write(os, auxCount);
277
+ write(os, hllByteArr_.data(), getHllByteArrBytes());
313
278
 
314
279
  // aux map if HLL_4
315
- if (this->tgtHllType == HLL_4) {
280
+ if (this->tgtHllType_ == HLL_4) {
316
281
  if (auxHashMap != nullptr) {
317
282
  if (compact) {
318
- for (uint32_t coupon: *auxHashMap) {
319
- os.write((char*)&coupon, sizeof(coupon));
283
+ for (const uint32_t coupon: *auxHashMap) {
284
+ write(os, coupon);
320
285
  }
321
286
  } else {
322
- os.write((char*)auxHashMap->getAuxIntArr(), auxHashMap->getUpdatableSizeBytes());
287
+ write(os, auxHashMap->getAuxIntArr(), auxHashMap->getUpdatableSizeBytes());
323
288
  }
324
289
  } else if (!compact) {
325
290
  // if updatable, we write even if currently unused so the binary can be wrapped
326
- int auxBytes = 4 << HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK];
327
- std::fill_n(std::ostreambuf_iterator<char>(os), auxBytes, 0);
291
+ uint32_t auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_];
292
+ std::fill_n(std::ostreambuf_iterator<char>(os), auxBytes, static_cast<char>(0));
328
293
  }
329
294
  }
330
295
  }
331
296
 
332
297
  template<typename A>
333
298
  double HllArray<A>::getEstimate() const {
334
- if (oooFlag) {
299
+ if (oooFlag_) {
335
300
  return getCompositeEstimate();
336
301
  }
337
302
  return getHipAccum();
@@ -353,50 +318,50 @@ double HllArray<A>::getEstimate() const {
353
318
  * the very small values <= k where curMin = 0 still apply.
354
319
  */
355
320
  template<typename A>
356
- double HllArray<A>::getLowerBound(const int numStdDev) const {
321
+ double HllArray<A>::getLowerBound(uint8_t numStdDev) const {
357
322
  HllUtil<A>::checkNumStdDev(numStdDev);
358
- const int configK = 1 << this->lgConfigK;
359
- const double numNonZeros = ((curMin == 0) ? (configK - numAtCurMin) : configK);
323
+ const uint32_t configK = 1 << this->lgConfigK_;
324
+ const double numNonZeros = ((curMin_ == 0) ? (configK - numAtCurMin_) : configK);
360
325
 
361
326
  double estimate;
362
327
  double rseFactor;
363
- if (oooFlag) {
328
+ if (oooFlag_) {
364
329
  estimate = getCompositeEstimate();
365
- rseFactor = HllUtil<A>::HLL_NON_HIP_RSE_FACTOR;
330
+ rseFactor = hll_constants::HLL_NON_HIP_RSE_FACTOR;
366
331
  } else {
367
- estimate = hipAccum;
368
- rseFactor = HllUtil<A>::HLL_HIP_RSE_FACTOR;
332
+ estimate = hipAccum_;
333
+ rseFactor = hll_constants::HLL_HIP_RSE_FACTOR;
369
334
  }
370
335
 
371
336
  double relErr;
372
- if (this->lgConfigK > 12) {
337
+ if (this->lgConfigK_ > 12) {
373
338
  relErr = (numStdDev * rseFactor) / sqrt(configK);
374
339
  } else {
375
- relErr = HllUtil<A>::getRelErr(false, oooFlag, this->lgConfigK, numStdDev);
340
+ relErr = HllUtil<A>::getRelErr(false, oooFlag_, this->lgConfigK_, numStdDev);
376
341
  }
377
342
  return fmax(estimate / (1.0 + relErr), numNonZeros);
378
343
  }
379
344
 
380
345
  template<typename A>
381
- double HllArray<A>::getUpperBound(const int numStdDev) const {
346
+ double HllArray<A>::getUpperBound(uint8_t numStdDev) const {
382
347
  HllUtil<A>::checkNumStdDev(numStdDev);
383
- const int configK = 1 << this->lgConfigK;
348
+ const uint32_t configK = 1 << this->lgConfigK_;
384
349
 
385
350
  double estimate;
386
351
  double rseFactor;
387
- if (oooFlag) {
352
+ if (oooFlag_) {
388
353
  estimate = getCompositeEstimate();
389
- rseFactor = HllUtil<A>::HLL_NON_HIP_RSE_FACTOR;
354
+ rseFactor = hll_constants::HLL_NON_HIP_RSE_FACTOR;
390
355
  } else {
391
- estimate = hipAccum;
392
- rseFactor = HllUtil<A>::HLL_HIP_RSE_FACTOR;
356
+ estimate = hipAccum_;
357
+ rseFactor = hll_constants::HLL_HIP_RSE_FACTOR;
393
358
  }
394
359
 
395
360
  double relErr;
396
- if (this->lgConfigK > 12) {
361
+ if (this->lgConfigK_ > 12) {
397
362
  relErr = (-1.0) * (numStdDev * rseFactor) / sqrt(configK);
398
363
  } else {
399
- relErr = HllUtil<A>::getRelErr(true, oooFlag, this->lgConfigK, numStdDev);
364
+ relErr = HllUtil<A>::getRelErr(true, oooFlag_, this->lgConfigK_, numStdDev);
400
365
  }
401
366
  return estimate / (1.0 + relErr);
402
367
  }
@@ -410,21 +375,21 @@ double HllArray<A>::getUpperBound(const int numStdDev) const {
410
375
  // Original C: again-two-registers.c hhb_get_composite_estimate L1489
411
376
  template<typename A>
412
377
  double HllArray<A>::getCompositeEstimate() const {
413
- const double rawEst = getHllRawEstimate(this->lgConfigK, kxq0 + kxq1);
378
+ const double rawEst = getHllRawEstimate();
414
379
 
415
- const double* xArr = CompositeInterpolationXTable<A>::get_x_arr(this->lgConfigK);
416
- const int xArrLen = CompositeInterpolationXTable<A>::get_x_arr_length();
417
- const double yStride = CompositeInterpolationXTable<A>::get_y_stride(this->lgConfigK);
380
+ const double* xArr = CompositeInterpolationXTable<A>::get_x_arr(this->lgConfigK_);
381
+ const uint32_t xArrLen = CompositeInterpolationXTable<A>::get_x_arr_length();
382
+ const double yStride = CompositeInterpolationXTable<A>::get_y_stride(this->lgConfigK_);
418
383
 
419
384
  if (rawEst < xArr[0]) {
420
385
  return 0;
421
386
  }
422
387
 
423
- const int xArrLenM1 = xArrLen - 1;
388
+ const uint32_t xArrLenM1 = xArrLen - 1;
424
389
 
425
390
  if (rawEst > xArr[xArrLenM1]) {
426
- double finalY = yStride * xArrLenM1;
427
- double factor = finalY / xArr[xArrLenM1];
391
+ const double finalY = yStride * xArrLenM1;
392
+ const double factor = finalY / xArr[xArrLenM1];
428
393
  return rawEst * factor;
429
394
  }
430
395
 
@@ -433,10 +398,9 @@ double HllArray<A>::getCompositeEstimate() const {
433
398
  // We need to completely avoid the linear_counting estimator if it might have a crazy value.
434
399
  // Empirical evidence suggests that the threshold 3*k will keep us safe if 2^4 <= k <= 2^21.
435
400
 
436
- if (adjEst > (3 << this->lgConfigK)) { return adjEst; }
401
+ if (adjEst > (3 << this->lgConfigK_)) { return adjEst; }
437
402
 
438
- const double linEst =
439
- getHllBitMapEstimate(this->lgConfigK, curMin, numAtCurMin);
403
+ const double linEst = getHllBitMapEstimate();
440
404
 
441
405
  // Bias is created when the value of an estimator is compared with a threshold to decide whether
442
406
  // to use that estimator or a different one.
@@ -448,70 +412,70 @@ double HllArray<A>::getCompositeEstimate() const {
448
412
  // The following constants comes from empirical measurements of the crossover point
449
413
  // between the average error of the linear estimator and the adjusted hll estimator
450
414
  double crossOver = 0.64;
451
- if (this->lgConfigK == 4) { crossOver = 0.718; }
452
- else if (this->lgConfigK == 5) { crossOver = 0.672; }
415
+ if (this->lgConfigK_ == 4) { crossOver = 0.718; }
416
+ else if (this->lgConfigK_ == 5) { crossOver = 0.672; }
453
417
 
454
- return (avgEst > (crossOver * (1 << this->lgConfigK))) ? adjEst : linEst;
418
+ return (avgEst > (crossOver * (1 << this->lgConfigK_))) ? adjEst : linEst;
455
419
  }
456
420
 
457
421
  template<typename A>
458
422
  double HllArray<A>::getKxQ0() const {
459
- return kxq0;
423
+ return kxq0_;
460
424
  }
461
425
 
462
426
  template<typename A>
463
427
  double HllArray<A>::getKxQ1() const {
464
- return kxq1;
428
+ return kxq1_;
465
429
  }
466
430
 
467
431
  template<typename A>
468
432
  double HllArray<A>::getHipAccum() const {
469
- return hipAccum;
433
+ return hipAccum_;
470
434
  }
471
435
 
472
436
  template<typename A>
473
- int HllArray<A>::getCurMin() const {
474
- return curMin;
437
+ uint8_t HllArray<A>::getCurMin() const {
438
+ return curMin_;
475
439
  }
476
440
 
477
441
  template<typename A>
478
- int HllArray<A>::getNumAtCurMin() const {
479
- return numAtCurMin;
442
+ uint32_t HllArray<A>::getNumAtCurMin() const {
443
+ return numAtCurMin_;
480
444
  }
481
445
 
482
446
  template<typename A>
483
- void HllArray<A>::putKxQ0(const double kxq0) {
484
- this->kxq0 = kxq0;
447
+ void HllArray<A>::putKxQ0(double kxq0) {
448
+ kxq0_ = kxq0;
485
449
  }
486
450
 
487
451
  template<typename A>
488
- void HllArray<A>::putKxQ1(const double kxq1) {
489
- this->kxq1 = kxq1;
452
+ void HllArray<A>::putKxQ1(double kxq1) {
453
+ kxq1_ = kxq1;
490
454
  }
491
455
 
492
456
  template<typename A>
493
- void HllArray<A>::putHipAccum(const double hipAccum) {
494
- this->hipAccum = hipAccum;
457
+ void HllArray<A>::putHipAccum(double hipAccum) {
458
+ hipAccum_ = hipAccum;
495
459
  }
496
460
 
497
461
  template<typename A>
498
- void HllArray<A>::putCurMin(const int curMin) {
499
- this->curMin = curMin;
462
+ void HllArray<A>::putCurMin(uint8_t curMin) {
463
+ curMin_ = curMin;
500
464
  }
501
465
 
502
466
  template<typename A>
503
- void HllArray<A>::putNumAtCurMin(const int numAtCurMin) {
504
- this->numAtCurMin = numAtCurMin;
467
+ void HllArray<A>::putNumAtCurMin(uint32_t numAtCurMin) {
468
+ numAtCurMin_ = numAtCurMin;
505
469
  }
506
470
 
507
471
  template<typename A>
508
472
  void HllArray<A>::decNumAtCurMin() {
509
- --numAtCurMin;
473
+ --numAtCurMin_;
510
474
  }
511
475
 
512
476
  template<typename A>
513
- void HllArray<A>::addToHipAccum(const double delta) {
514
- hipAccum += delta;
477
+ void HllArray<A>::addToHipAccum(double delta) {
478
+ hipAccum_ += delta;
515
479
  }
516
480
 
517
481
  template<typename A>
@@ -521,22 +485,22 @@ bool HllArray<A>::isCompact() const {
521
485
 
522
486
  template<typename A>
523
487
  bool HllArray<A>::isEmpty() const {
524
- const int configK = 1 << this->lgConfigK;
488
+ const uint32_t configK = 1 << this->lgConfigK_;
525
489
  return (getCurMin() == 0) && (getNumAtCurMin() == configK);
526
490
  }
527
491
 
528
492
  template<typename A>
529
493
  void HllArray<A>::putOutOfOrderFlag(bool flag) {
530
- oooFlag = flag;
494
+ oooFlag_ = flag;
531
495
  }
532
496
 
533
497
  template<typename A>
534
498
  bool HllArray<A>::isOutOfOrderFlag() const {
535
- return oooFlag;
499
+ return oooFlag_;
536
500
  }
537
501
 
538
502
  template<typename A>
539
- int HllArray<A>::hllArrBytes(target_hll_type tgtHllType, int lgConfigK) {
503
+ uint32_t HllArray<A>::hllArrBytes(target_hll_type tgtHllType, uint8_t lgConfigK) {
540
504
  switch (tgtHllType) {
541
505
  case HLL_4:
542
506
  return hll4ArrBytes(lgConfigK);
@@ -550,41 +514,41 @@ int HllArray<A>::hllArrBytes(target_hll_type tgtHllType, int lgConfigK) {
550
514
  }
551
515
 
552
516
  template<typename A>
553
- int HllArray<A>::hll4ArrBytes(const int lgConfigK) {
517
+ uint32_t HllArray<A>::hll4ArrBytes(uint8_t lgConfigK) {
554
518
  return 1 << (lgConfigK - 1);
555
519
  }
556
520
 
557
521
  template<typename A>
558
- int HllArray<A>::hll6ArrBytes(const int lgConfigK) {
559
- const int numSlots = 1 << lgConfigK;
522
+ uint32_t HllArray<A>::hll6ArrBytes(uint8_t lgConfigK) {
523
+ const uint32_t numSlots = 1 << lgConfigK;
560
524
  return ((numSlots * 3) >> 2) + 1;
561
525
  }
562
526
 
563
527
  template<typename A>
564
- int HllArray<A>::hll8ArrBytes(const int lgConfigK) {
528
+ uint32_t HllArray<A>::hll8ArrBytes(uint8_t lgConfigK) {
565
529
  return 1 << lgConfigK;
566
530
  }
567
531
 
568
532
  template<typename A>
569
- int HllArray<A>::getMemDataStart() const {
570
- return HllUtil<A>::HLL_BYTE_ARR_START;
533
+ uint32_t HllArray<A>::getMemDataStart() const {
534
+ return hll_constants::HLL_BYTE_ARR_START;
571
535
  }
572
536
 
573
537
  template<typename A>
574
- int HllArray<A>::getUpdatableSerializationBytes() const {
575
- return HllUtil<A>::HLL_BYTE_ARR_START + getHllByteArrBytes();
538
+ uint32_t HllArray<A>::getUpdatableSerializationBytes() const {
539
+ return hll_constants::HLL_BYTE_ARR_START + getHllByteArrBytes();
576
540
  }
577
541
 
578
542
  template<typename A>
579
- int HllArray<A>::getCompactSerializationBytes() const {
543
+ uint32_t HllArray<A>::getCompactSerializationBytes() const {
580
544
  AuxHashMap<A>* auxHashMap = getAuxHashMap();
581
- const int auxCountBytes = ((auxHashMap == nullptr) ? 0 : auxHashMap->getCompactSizeBytes());
582
- return HllUtil<A>::HLL_BYTE_ARR_START + getHllByteArrBytes() + auxCountBytes;
545
+ const uint32_t auxCountBytes = ((auxHashMap == nullptr) ? 0 : auxHashMap->getCompactSizeBytes());
546
+ return hll_constants::HLL_BYTE_ARR_START + getHllByteArrBytes() + auxCountBytes;
583
547
  }
584
548
 
585
549
  template<typename A>
586
- int HllArray<A>::getPreInts() const {
587
- return HllUtil<A>::HLL_PREINTS;
550
+ uint8_t HllArray<A>::getPreInts() const {
551
+ return hll_constants::HLL_PREINTS;
588
552
  }
589
553
 
590
554
  template<typename A>
@@ -594,14 +558,14 @@ AuxHashMap<A>* HllArray<A>::getAuxHashMap() const {
594
558
 
595
559
  template<typename A>
596
560
  void HllArray<A>::hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue) {
597
- const int configK = 1 << this->getLgConfigK();
561
+ const uint32_t configK = 1 << this->getLgConfigK();
598
562
  // update hip BEFORE updating kxq
599
- if (!oooFlag) hipAccum += configK / (kxq0 + kxq1);
563
+ if (!oooFlag_) hipAccum_ += configK / (kxq0_ + kxq1_);
600
564
  // update kxq0 and kxq1; subtract first, then add
601
- if (oldValue < 32) { kxq0 -= INVERSE_POWERS_OF_2[oldValue]; }
602
- else { kxq1 -= INVERSE_POWERS_OF_2[oldValue]; }
603
- if (newValue < 32) { kxq0 += INVERSE_POWERS_OF_2[newValue]; }
604
- else { kxq1 += INVERSE_POWERS_OF_2[newValue]; }
565
+ if (oldValue < 32) { kxq0_ -= INVERSE_POWERS_OF_2[oldValue]; }
566
+ else { kxq1_ -= INVERSE_POWERS_OF_2[oldValue]; }
567
+ if (newValue < 32) { kxq0_ += INVERSE_POWERS_OF_2[newValue]; }
568
+ else { kxq1_ += INVERSE_POWERS_OF_2[newValue]; }
605
569
  }
606
570
 
607
571
  /**
@@ -611,96 +575,101 @@ void HllArray<A>::hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue)
611
575
  */
612
576
  //In C: again-two-registers.c hhb_get_improved_linear_counting_estimate L1274
613
577
  template<typename A>
614
- double HllArray<A>::getHllBitMapEstimate(const int lgConfigK, const int curMin, const int numAtCurMin) const {
615
- const int configK = 1 << lgConfigK;
616
- const int numUnhitBuckets = ((curMin == 0) ? numAtCurMin : 0);
578
+ double HllArray<A>::getHllBitMapEstimate() const {
579
+ const uint32_t configK = 1 << this->lgConfigK_;
580
+ const uint32_t numUnhitBuckets = curMin_ == 0 ? numAtCurMin_ : 0;
617
581
 
618
582
  //This will eventually go away.
619
583
  if (numUnhitBuckets == 0) {
620
584
  return configK * log(configK / 0.5);
621
585
  }
622
586
 
623
- const int numHitBuckets = configK - numUnhitBuckets;
587
+ const uint32_t numHitBuckets = configK - numUnhitBuckets;
624
588
  return HarmonicNumbers<A>::getBitMapEstimate(configK, numHitBuckets);
625
589
  }
626
590
 
627
591
  //In C: again-two-registers.c hhb_get_raw_estimate L1167
628
592
  template<typename A>
629
- double HllArray<A>::getHllRawEstimate(const int lgConfigK, const double kxqSum) const {
630
- const int configK = 1 << lgConfigK;
593
+ double HllArray<A>::getHllRawEstimate() const {
594
+ const uint32_t configK = 1 << this->lgConfigK_;
631
595
  double correctionFactor;
632
- if (lgConfigK == 4) { correctionFactor = 0.673; }
633
- else if (lgConfigK == 5) { correctionFactor = 0.697; }
634
- else if (lgConfigK == 6) { correctionFactor = 0.709; }
596
+ if (this->lgConfigK_ == 4) { correctionFactor = 0.673; }
597
+ else if (this->lgConfigK_ == 5) { correctionFactor = 0.697; }
598
+ else if (this->lgConfigK_ == 6) { correctionFactor = 0.709; }
635
599
  else { correctionFactor = 0.7213 / (1.0 + (1.079 / configK)); }
636
- const double hyperEst = (correctionFactor * configK * configK) / kxqSum;
600
+ const double hyperEst = (correctionFactor * configK * configK) / (kxq0_ + kxq1_);
637
601
  return hyperEst;
638
602
  }
639
603
 
640
604
  template<typename A>
641
605
  typename HllArray<A>::const_iterator HllArray<A>::begin(bool all) const {
642
- return const_iterator(hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, nullptr, 0, all);
606
+ return const_iterator(hllByteArr_.data(), 1 << this->lgConfigK_, 0, this->tgtHllType_, nullptr, 0, all);
643
607
  }
644
608
 
645
609
  template<typename A>
646
610
  typename HllArray<A>::const_iterator HllArray<A>::end() const {
647
- return const_iterator(hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, nullptr, 0, false);
611
+ return const_iterator(hllByteArr_.data(), 1 << this->lgConfigK_, 1 << this->lgConfigK_, this->tgtHllType_, nullptr, 0, false);
648
612
  }
649
613
 
650
614
  template<typename A>
651
- HllArray<A>::const_iterator::const_iterator(const uint8_t* array, size_t array_size, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all):
652
- array(array), array_size(array_size), index(index), hll_type(hll_type), exceptions(exceptions), offset(offset), all(all)
615
+ HllArray<A>::const_iterator::const_iterator(const uint8_t* array, uint32_t array_size, uint32_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all):
616
+ array_(array), array_size_(array_size), index_(index), hll_type_(hll_type), exceptions_(exceptions), offset_(offset), all_(all)
653
617
  {
654
- while (this->index < array_size) {
655
- value = get_value(array, this->index, hll_type, exceptions, offset);
656
- if (all || value != HllUtil<A>::EMPTY) break;
657
- this->index++;
618
+ while (index_ < array_size_) {
619
+ value_ = get_value(array_, index_, hll_type_, exceptions_, offset_);
620
+ if (all_ || value_ != hll_constants::EMPTY) break;
621
+ ++index_;
658
622
  }
659
623
  }
660
624
 
661
625
  template<typename A>
662
626
  typename HllArray<A>::const_iterator& HllArray<A>::const_iterator::operator++() {
663
- while (++index < array_size) {
664
- value = get_value(array, index, hll_type, exceptions, offset);
665
- if (all || value != HllUtil<A>::EMPTY) break;
627
+ while (++index_ < array_size_) {
628
+ value_ = get_value(array_, index_, hll_type_, exceptions_, offset_);
629
+ if (all_ || value_ != hll_constants::EMPTY) break;
666
630
  }
667
631
  return *this;
668
632
  }
669
633
 
670
634
  template<typename A>
671
635
  bool HllArray<A>::const_iterator::operator!=(const const_iterator& other) const {
672
- return index != other.index;
636
+ return index_ != other.index_;
673
637
  }
674
638
 
675
639
  template<typename A>
676
640
  uint32_t HllArray<A>::const_iterator::operator*() const {
677
- return HllUtil<A>::pair(index, value);
641
+ return HllUtil<A>::pair(index_, value_);
678
642
  }
679
643
 
680
644
  template<typename A>
681
- uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset) {
645
+ uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array, uint32_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset) {
682
646
  if (hll_type == target_hll_type::HLL_4) {
683
647
  uint8_t value = array[index >> 1];
684
648
  if ((index & 1) > 0) { // odd
685
649
  value >>= 4;
686
650
  } else {
687
- value &= HllUtil<A>::loNibbleMask;
651
+ value &= hll_constants::loNibbleMask;
688
652
  }
689
- if (value == HllUtil<A>::AUX_TOKEN) { // exception
653
+ if (value == hll_constants::AUX_TOKEN) { // exception
690
654
  return exceptions->mustFindValueFor(index);
691
655
  }
692
656
  return value + offset;
693
657
  } else if (hll_type == target_hll_type::HLL_6) {
694
- const int start_bit = index * 6;
695
- const int shift = start_bit & 0x7;
696
- const int byte_idx = start_bit >> 3;
658
+ const size_t start_bit = index * 6;
659
+ const uint8_t shift = start_bit & 0x7;
660
+ const size_t byte_idx = start_bit >> 3;
697
661
  const uint16_t two_byte_val = (array[byte_idx + 1] << 8) | array[byte_idx];
698
- return (two_byte_val >> shift) & HllUtil<A>::VAL_MASK_6;
662
+ return (two_byte_val >> shift) & hll_constants::VAL_MASK_6;
699
663
  }
700
664
  // HLL_8
701
665
  return array[index];
702
666
  }
703
667
 
668
+ template<typename A>
669
+ A HllArray<A>::getAllocator() const {
670
+ return hllByteArr_.get_allocator();
671
+ }
672
+
704
673
  }
705
674
 
706
675
  #endif // _HLLARRAY_INTERNAL_HPP_