datasketches 0.2.0 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +7 -7
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  17. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  18. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  19. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  20. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  21. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  22. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  26. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
  31. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  32. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  33. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  34. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  35. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  36. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  37. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  38. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  39. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  40. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
  41. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  42. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  43. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  44. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  45. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  46. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  47. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  48. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  49. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  50. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  51. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  52. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  53. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  54. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  55. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  56. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  57. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  58. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  59. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  60. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  61. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  62. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
  63. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  64. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  65. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  66. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  67. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  68. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  69. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  70. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  71. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  72. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  73. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  74. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  75. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  76. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  77. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  78. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  79. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  80. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  81. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  82. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  83. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  84. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  85. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
  86. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
  87. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  88. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  89. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  90. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
  91. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  92. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  93. data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
  94. data/vendor/datasketches-cpp/python/README.md +50 -50
  95. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  96. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  97. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  98. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
  99. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
  100. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  101. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  102. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  103. data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
  104. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  105. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  106. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  107. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  108. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  109. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  110. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  111. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  112. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
  113. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  114. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
  117. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
  118. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  119. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  120. data/vendor/datasketches-cpp/setup.py +10 -7
  121. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  122. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  124. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
  125. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  126. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  127. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  128. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  129. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  130. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  131. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  132. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
  133. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
  134. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
  135. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
  137. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
  139. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
  140. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  141. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  142. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  143. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  144. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  145. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  146. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  147. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  148. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  149. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  150. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
  151. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
  152. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  153. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  154. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
  155. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  157. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  158. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
  159. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  160. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  161. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
  162. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  163. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  164. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  165. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  166. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
  167. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
  168. metadata +18 -7
  169. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  170. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -35,19 +35,19 @@
35
35
  namespace datasketches {
36
36
 
37
37
  template<typename A>
38
- HllArray<A>::HllArray(const int lgConfigK, const target_hll_type tgtHllType, bool startFullSize, const A& allocator):
38
+ HllArray<A>::HllArray(uint8_t lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator):
39
39
  HllSketchImpl<A>(lgConfigK, tgtHllType, hll_mode::HLL, startFullSize),
40
- hipAccum(0.0),
41
- kxq0(1 << lgConfigK),
42
- kxq1(0.0),
43
- hllByteArr(allocator),
44
- curMin(0),
45
- numAtCurMin(1 << lgConfigK),
46
- oooFlag(false)
40
+ hipAccum_(0.0),
41
+ kxq0_(1 << lgConfigK),
42
+ kxq1_(0.0),
43
+ hllByteArr_(allocator),
44
+ curMin_(0),
45
+ numAtCurMin_(1 << lgConfigK),
46
+ oooFlag_(false)
47
47
  {}
48
48
 
49
49
  template<typename A>
50
- HllArray<A>* HllArray<A>::copyAs(const target_hll_type tgtHllType) const {
50
+ HllArray<A>* HllArray<A>::copyAs(target_hll_type tgtHllType) const {
51
51
  if (tgtHllType == this->getTgtHllType()) {
52
52
  return static_cast<HllArray*>(copy());
53
53
  }
@@ -62,54 +62,54 @@ HllArray<A>* HllArray<A>::copyAs(const target_hll_type tgtHllType) const {
62
62
 
63
63
  template<typename A>
64
64
  HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len, const A& allocator) {
65
- if (len < HllUtil<A>::HLL_BYTE_ARR_START) {
65
+ if (len < hll_constants::HLL_BYTE_ARR_START) {
66
66
  throw std::out_of_range("Input data length insufficient to hold HLL array");
67
67
  }
68
68
 
69
69
  const uint8_t* data = static_cast<const uint8_t*>(bytes);
70
- if (data[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HLL_PREINTS) {
70
+ if (data[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::HLL_PREINTS) {
71
71
  throw std::invalid_argument("Incorrect number of preInts in input stream");
72
72
  }
73
- if (data[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
73
+ if (data[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
74
74
  throw std::invalid_argument("Wrong ser ver in input stream");
75
75
  }
76
- if (data[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
76
+ if (data[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
77
77
  throw std::invalid_argument("Input array is not an HLL sketch");
78
78
  }
79
79
 
80
- const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
80
+ const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[hll_constants::MODE_BYTE]);
81
81
  if (mode != HLL) {
82
- throw std::invalid_argument("Calling HLL array construtor with non-HLL mode data");
82
+ throw std::invalid_argument("Calling HLL array constructor with non-HLL mode data");
83
83
  }
84
84
 
85
- const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
86
- const bool oooFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
87
- const bool comapctFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
88
- const bool startFullSizeFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::FULL_SIZE_FLAG_MASK) ? true : false);
85
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[hll_constants::MODE_BYTE]);
86
+ const bool oooFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::OUT_OF_ORDER_FLAG_MASK) ? true : false);
87
+ const bool comapctFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ? true : false);
88
+ const bool startFullSizeFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::FULL_SIZE_FLAG_MASK) ? true : false);
89
89
 
90
- const int lgK = (int) data[HllUtil<A>::LG_K_BYTE];
91
- const int curMin = (int) data[HllUtil<A>::HLL_CUR_MIN_BYTE];
90
+ const uint8_t lgK = data[hll_constants::LG_K_BYTE];
91
+ const uint8_t curMin = data[hll_constants::HLL_CUR_MIN_BYTE];
92
92
 
93
- const int arrayBytes = hllArrBytes(tgtHllType, lgK);
94
- if (len < static_cast<size_t>(HllUtil<A>::HLL_BYTE_ARR_START + arrayBytes)) {
93
+ const uint32_t arrayBytes = hllArrBytes(tgtHllType, lgK);
94
+ if (len < static_cast<size_t>(hll_constants::HLL_BYTE_ARR_START + arrayBytes)) {
95
95
  throw std::out_of_range("Input array too small to hold sketch image");
96
96
  }
97
97
 
98
98
  double hip, kxq0, kxq1;
99
- std::memcpy(&hip, data + HllUtil<A>::HIP_ACCUM_DOUBLE, sizeof(double));
100
- std::memcpy(&kxq0, data + HllUtil<A>::KXQ0_DOUBLE, sizeof(double));
101
- std::memcpy(&kxq1, data + HllUtil<A>::KXQ1_DOUBLE, sizeof(double));
99
+ std::memcpy(&hip, data + hll_constants::HIP_ACCUM_DOUBLE, sizeof(double));
100
+ std::memcpy(&kxq0, data + hll_constants::KXQ0_DOUBLE, sizeof(double));
101
+ std::memcpy(&kxq1, data + hll_constants::KXQ1_DOUBLE, sizeof(double));
102
102
 
103
- int numAtCurMin, auxCount;
104
- std::memcpy(&numAtCurMin, data + HllUtil<A>::CUR_MIN_COUNT_INT, sizeof(int));
105
- std::memcpy(&auxCount, data + HllUtil<A>::AUX_COUNT_INT, sizeof(int));
103
+ uint32_t numAtCurMin, auxCount;
104
+ std::memcpy(&numAtCurMin, data + hll_constants::CUR_MIN_COUNT_INT, sizeof(int));
105
+ std::memcpy(&auxCount, data + hll_constants::AUX_COUNT_INT, sizeof(int));
106
106
 
107
107
  AuxHashMap<A>* auxHashMap = nullptr;
108
108
  typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
109
109
  aux_hash_map_ptr aux_ptr;
110
110
  if (auxCount > 0) { // necessarily TgtHllType == HLL_4
111
- int auxLgIntArrSize = (int) data[4];
112
- const size_t offset = HllUtil<A>::HLL_BYTE_ARR_START + arrayBytes;
111
+ uint8_t auxLgIntArrSize = data[4];
112
+ const size_t offset = hll_constants::HLL_BYTE_ARR_START + arrayBytes;
113
113
  const uint8_t* auxDataStart = data + offset;
114
114
  auxHashMap = AuxHashMap<A>::deserialize(auxDataStart, len - offset, lgK, auxCount, auxLgIntArrSize, comapctFlag, allocator);
115
115
  aux_ptr = aux_hash_map_ptr(auxHashMap, auxHashMap->make_deleter());
@@ -123,7 +123,7 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len, const A& allocat
123
123
  sketch->putKxQ1(kxq1);
124
124
  sketch->putNumAtCurMin(numAtCurMin);
125
125
 
126
- std::memcpy(sketch->hllByteArr.data(), data + HllUtil<A>::HLL_BYTE_ARR_START, arrayBytes);
126
+ std::memcpy(sketch->hllByteArr_.data(), data + hll_constants::HLL_BYTE_ARR_START, arrayBytes);
127
127
 
128
128
  if (auxHashMap != nullptr)
129
129
  ((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
@@ -135,30 +135,30 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len, const A& allocat
135
135
  template<typename A>
136
136
  HllArray<A>* HllArray<A>::newHll(std::istream& is, const A& allocator) {
137
137
  uint8_t listHeader[8];
138
- is.read((char*)listHeader, 8 * sizeof(uint8_t));
138
+ read(is, listHeader, 8 * sizeof(uint8_t));
139
139
 
140
- if (listHeader[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HLL_PREINTS) {
140
+ if (listHeader[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::HLL_PREINTS) {
141
141
  throw std::invalid_argument("Incorrect number of preInts in input stream");
142
142
  }
143
- if (listHeader[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
143
+ if (listHeader[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
144
144
  throw std::invalid_argument("Wrong ser ver in input stream");
145
145
  }
146
- if (listHeader[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
146
+ if (listHeader[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
147
147
  throw std::invalid_argument("Input stream is not an HLL sketch");
148
148
  }
149
149
 
150
- hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
150
+ hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[hll_constants::MODE_BYTE]);
151
151
  if (mode != HLL) {
152
152
  throw std::invalid_argument("Calling HLL construtor with non-HLL mode data");
153
153
  }
154
154
 
155
- const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
156
- const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
157
- const bool comapctFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
158
- const bool startFullSizeFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::FULL_SIZE_FLAG_MASK) ? true : false);
155
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[hll_constants::MODE_BYTE]);
156
+ const bool oooFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::OUT_OF_ORDER_FLAG_MASK) ? true : false);
157
+ const bool comapctFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ? true : false);
158
+ const bool startFullSizeFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::FULL_SIZE_FLAG_MASK) ? true : false);
159
159
 
160
- const int lgK = (int) listHeader[HllUtil<A>::LG_K_BYTE];
161
- const int curMin = (int) listHeader[HllUtil<A>::HLL_CUR_MIN_BYTE];
160
+ const uint8_t lgK = listHeader[hll_constants::LG_K_BYTE];
161
+ const uint8_t curMin = listHeader[hll_constants::HLL_CUR_MIN_BYTE];
162
162
 
163
163
  HllArray* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag, allocator);
164
164
  typedef std::unique_ptr<HllArray<A>, std::function<void(HllSketchImpl<A>*)>> hll_array_ptr;
@@ -166,23 +166,21 @@ HllArray<A>* HllArray<A>::newHll(std::istream& is, const A& allocator) {
166
166
  sketch->putCurMin(curMin);
167
167
  sketch->putOutOfOrderFlag(oooFlag);
168
168
 
169
- double hip, kxq0, kxq1;
170
- is.read((char*)&hip, sizeof(hip));
171
- is.read((char*)&kxq0, sizeof(kxq0));
172
- is.read((char*)&kxq1, sizeof(kxq1));
169
+ const auto hip = read<double>(is);
170
+ const auto kxq0 = read<double>(is);
171
+ const auto kxq1 = read<double>(is);
173
172
  if (!oooFlag) sketch->putHipAccum(hip);
174
173
  sketch->putKxQ0(kxq0);
175
174
  sketch->putKxQ1(kxq1);
176
175
 
177
- int numAtCurMin, auxCount;
178
- is.read((char*)&numAtCurMin, sizeof(numAtCurMin));
179
- is.read((char*)&auxCount, sizeof(auxCount));
176
+ const auto numAtCurMin = read<uint32_t>(is);
177
+ const auto auxCount = read<uint32_t>(is);
180
178
  sketch->putNumAtCurMin(numAtCurMin);
181
179
 
182
- is.read((char*)sketch->hllByteArr.data(), sketch->getHllByteArrBytes());
180
+ read(is, sketch->hllByteArr_.data(), sketch->getHllByteArrBytes());
183
181
 
184
182
  if (auxCount > 0) { // necessarily TgtHllType == HLL_4
185
- int auxLgIntArrSize = listHeader[4];
183
+ uint8_t auxLgIntArrSize = listHeader[4];
186
184
  AuxHashMap<A>* auxHashMap = AuxHashMap<A>::deserialize(is, lgK, auxCount, auxLgIntArrSize, comapctFlag, allocator);
187
185
  ((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
188
186
  }
@@ -200,31 +198,31 @@ vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) co
200
198
  uint8_t* bytes = byteArr.data() + header_size_bytes;
201
199
  AuxHashMap<A>* auxHashMap = getAuxHashMap();
202
200
 
203
- bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
204
- bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER);
205
- bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID);
206
- bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK);
207
- bytes[HllUtil<A>::LG_ARR_BYTE] = static_cast<uint8_t>(auxHashMap == nullptr ? 0 : auxHashMap->getLgAuxArrInts());
208
- bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact);
209
- bytes[HllUtil<A>::HLL_CUR_MIN_BYTE] = static_cast<uint8_t>(curMin);
210
- bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte();
211
-
212
- std::memcpy(bytes + HllUtil<A>::HIP_ACCUM_DOUBLE, &hipAccum, sizeof(double));
213
- std::memcpy(bytes + HllUtil<A>::KXQ0_DOUBLE, &kxq0, sizeof(double));
214
- std::memcpy(bytes + HllUtil<A>::KXQ1_DOUBLE, &kxq1, sizeof(double));
215
- std::memcpy(bytes + HllUtil<A>::CUR_MIN_COUNT_INT, &numAtCurMin, sizeof(int));
216
- const int auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
217
- std::memcpy(bytes + HllUtil<A>::AUX_COUNT_INT, &auxCount, sizeof(int));
218
-
219
- const int hllByteArrBytes = getHllByteArrBytes();
220
- std::memcpy(bytes + getMemDataStart(), hllByteArr.data(), hllByteArrBytes);
201
+ bytes[hll_constants::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
202
+ bytes[hll_constants::SER_VER_BYTE] = static_cast<uint8_t>(hll_constants::SER_VER);
203
+ bytes[hll_constants::FAMILY_BYTE] = static_cast<uint8_t>(hll_constants::FAMILY_ID);
204
+ bytes[hll_constants::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK_);
205
+ bytes[hll_constants::LG_ARR_BYTE] = static_cast<uint8_t>(auxHashMap == nullptr ? 0 : auxHashMap->getLgAuxArrInts());
206
+ bytes[hll_constants::FLAGS_BYTE] = this->makeFlagsByte(compact);
207
+ bytes[hll_constants::HLL_CUR_MIN_BYTE] = static_cast<uint8_t>(curMin_);
208
+ bytes[hll_constants::MODE_BYTE] = this->makeModeByte();
209
+
210
+ std::memcpy(bytes + hll_constants::HIP_ACCUM_DOUBLE, &hipAccum_, sizeof(double));
211
+ std::memcpy(bytes + hll_constants::KXQ0_DOUBLE, &kxq0_, sizeof(double));
212
+ std::memcpy(bytes + hll_constants::KXQ1_DOUBLE, &kxq1_, sizeof(double));
213
+ std::memcpy(bytes + hll_constants::CUR_MIN_COUNT_INT, &numAtCurMin_, sizeof(uint32_t));
214
+ const uint32_t auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
215
+ std::memcpy(bytes + hll_constants::AUX_COUNT_INT, &auxCount, sizeof(uint32_t));
216
+
217
+ const uint32_t hllByteArrBytes = getHllByteArrBytes();
218
+ std::memcpy(bytes + getMemDataStart(), hllByteArr_.data(), hllByteArrBytes);
221
219
 
222
220
  // aux map if HLL_4
223
- if (this->tgtHllType == HLL_4) {
221
+ if (this->tgtHllType_ == HLL_4) {
224
222
  bytes += getMemDataStart() + hllByteArrBytes; // start of auxHashMap
225
223
  if (auxHashMap != nullptr) {
226
224
  if (compact) {
227
- for (uint32_t coupon: *auxHashMap) {
225
+ for (const uint32_t coupon: *auxHashMap) {
228
226
  std::memcpy(bytes, &coupon, sizeof(coupon));
229
227
  bytes += sizeof(coupon);
230
228
  }
@@ -233,8 +231,8 @@ vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) co
233
231
  }
234
232
  } else if (!compact) {
235
233
  // if updatable, we write even if currently unused so the binary can be wrapped
236
- int auxBytes = 4 << HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK];
237
- std::fill_n(bytes, auxBytes, 0);
234
+ uint32_t auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_];
235
+ std::fill_n(bytes, auxBytes, static_cast<uint8_t>(0));
238
236
  }
239
237
  }
240
238
 
@@ -242,64 +240,63 @@ vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) co
242
240
  }
243
241
 
244
242
  template<typename A>
245
- void HllArray<A>::serialize(std::ostream& os, const bool compact) const {
243
+ void HllArray<A>::serialize(std::ostream& os, bool compact) const {
246
244
  // header
247
- const uint8_t preInts(getPreInts());
248
- os.write((char*)&preInts, sizeof(preInts));
249
- const uint8_t serialVersion(HllUtil<A>::SER_VER);
250
- os.write((char*)&serialVersion, sizeof(serialVersion));
251
- const uint8_t familyId(HllUtil<A>::FAMILY_ID);
252
- os.write((char*)&familyId, sizeof(familyId));
253
- const uint8_t lgKByte((uint8_t) this->lgConfigK);
254
- os.write((char*)&lgKByte, sizeof(lgKByte));
245
+ const uint8_t preInts = getPreInts();
246
+ write(os, preInts);
247
+ const uint8_t serialVersion = hll_constants::SER_VER;
248
+ write(os, serialVersion);
249
+ const uint8_t familyId = hll_constants::FAMILY_ID;
250
+ write(os, familyId);
251
+ const uint8_t lgKByte = this->lgConfigK_;
252
+ write(os, lgKByte);
255
253
 
256
254
  AuxHashMap<A>* auxHashMap = getAuxHashMap();
257
- uint8_t lgArrByte(0);
255
+ uint8_t lgArrByte = 0;
258
256
  if (auxHashMap != nullptr) {
259
257
  lgArrByte = auxHashMap->getLgAuxArrInts();
260
258
  }
261
- os.write((char*)&lgArrByte, sizeof(lgArrByte));
259
+ write(os, lgArrByte);
262
260
 
263
- const uint8_t flagsByte(this->makeFlagsByte(compact));
264
- os.write((char*)&flagsByte, sizeof(flagsByte));
265
- const uint8_t curMinByte((uint8_t) curMin);
266
- os.write((char*)&curMinByte, sizeof(curMinByte));
267
- const uint8_t modeByte(this->makeModeByte());
268
- os.write((char*)&modeByte, sizeof(modeByte));
261
+ const uint8_t flagsByte = this->makeFlagsByte(compact);
262
+ write(os, flagsByte);
263
+ write(os, curMin_);
264
+ const uint8_t modeByte = this->makeModeByte();
265
+ write(os, modeByte);
269
266
 
270
267
  // estimator data
271
- os.write((char*)&hipAccum, sizeof(hipAccum));
272
- os.write((char*)&kxq0, sizeof(kxq0));
273
- os.write((char*)&kxq1, sizeof(kxq1));
268
+ write(os, hipAccum_);
269
+ write(os, kxq0_);
270
+ write(os, kxq1_);
274
271
 
275
272
  // array data
276
- os.write((char*)&numAtCurMin, sizeof(numAtCurMin));
273
+ write(os, numAtCurMin_);
277
274
 
278
- const int auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
279
- os.write((char*)&auxCount, sizeof(auxCount));
280
- os.write((char*)hllByteArr.data(), getHllByteArrBytes());
275
+ const uint32_t auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
276
+ write(os, auxCount);
277
+ write(os, hllByteArr_.data(), getHllByteArrBytes());
281
278
 
282
279
  // aux map if HLL_4
283
- if (this->tgtHllType == HLL_4) {
280
+ if (this->tgtHllType_ == HLL_4) {
284
281
  if (auxHashMap != nullptr) {
285
282
  if (compact) {
286
- for (uint32_t coupon: *auxHashMap) {
287
- os.write((char*)&coupon, sizeof(coupon));
283
+ for (const uint32_t coupon: *auxHashMap) {
284
+ write(os, coupon);
288
285
  }
289
286
  } else {
290
- os.write((char*)auxHashMap->getAuxIntArr(), auxHashMap->getUpdatableSizeBytes());
287
+ write(os, auxHashMap->getAuxIntArr(), auxHashMap->getUpdatableSizeBytes());
291
288
  }
292
289
  } else if (!compact) {
293
290
  // if updatable, we write even if currently unused so the binary can be wrapped
294
- int auxBytes = 4 << HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK];
295
- std::fill_n(std::ostreambuf_iterator<char>(os), auxBytes, 0);
291
+ uint32_t auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_];
292
+ std::fill_n(std::ostreambuf_iterator<char>(os), auxBytes, static_cast<char>(0));
296
293
  }
297
294
  }
298
295
  }
299
296
 
300
297
  template<typename A>
301
298
  double HllArray<A>::getEstimate() const {
302
- if (oooFlag) {
299
+ if (oooFlag_) {
303
300
  return getCompositeEstimate();
304
301
  }
305
302
  return getHipAccum();
@@ -321,50 +318,50 @@ double HllArray<A>::getEstimate() const {
321
318
  * the very small values <= k where curMin = 0 still apply.
322
319
  */
323
320
  template<typename A>
324
- double HllArray<A>::getLowerBound(const int numStdDev) const {
321
+ double HllArray<A>::getLowerBound(uint8_t numStdDev) const {
325
322
  HllUtil<A>::checkNumStdDev(numStdDev);
326
- const int configK = 1 << this->lgConfigK;
327
- const double numNonZeros = ((curMin == 0) ? (configK - numAtCurMin) : configK);
323
+ const uint32_t configK = 1 << this->lgConfigK_;
324
+ const double numNonZeros = ((curMin_ == 0) ? (configK - numAtCurMin_) : configK);
328
325
 
329
326
  double estimate;
330
327
  double rseFactor;
331
- if (oooFlag) {
328
+ if (oooFlag_) {
332
329
  estimate = getCompositeEstimate();
333
- rseFactor = HllUtil<A>::HLL_NON_HIP_RSE_FACTOR;
330
+ rseFactor = hll_constants::HLL_NON_HIP_RSE_FACTOR;
334
331
  } else {
335
- estimate = hipAccum;
336
- rseFactor = HllUtil<A>::HLL_HIP_RSE_FACTOR;
332
+ estimate = hipAccum_;
333
+ rseFactor = hll_constants::HLL_HIP_RSE_FACTOR;
337
334
  }
338
335
 
339
336
  double relErr;
340
- if (this->lgConfigK > 12) {
337
+ if (this->lgConfigK_ > 12) {
341
338
  relErr = (numStdDev * rseFactor) / sqrt(configK);
342
339
  } else {
343
- relErr = HllUtil<A>::getRelErr(false, oooFlag, this->lgConfigK, numStdDev);
340
+ relErr = HllUtil<A>::getRelErr(false, oooFlag_, this->lgConfigK_, numStdDev);
344
341
  }
345
342
  return fmax(estimate / (1.0 + relErr), numNonZeros);
346
343
  }
347
344
 
348
345
  template<typename A>
349
- double HllArray<A>::getUpperBound(const int numStdDev) const {
346
+ double HllArray<A>::getUpperBound(uint8_t numStdDev) const {
350
347
  HllUtil<A>::checkNumStdDev(numStdDev);
351
- const int configK = 1 << this->lgConfigK;
348
+ const uint32_t configK = 1 << this->lgConfigK_;
352
349
 
353
350
  double estimate;
354
351
  double rseFactor;
355
- if (oooFlag) {
352
+ if (oooFlag_) {
356
353
  estimate = getCompositeEstimate();
357
- rseFactor = HllUtil<A>::HLL_NON_HIP_RSE_FACTOR;
354
+ rseFactor = hll_constants::HLL_NON_HIP_RSE_FACTOR;
358
355
  } else {
359
- estimate = hipAccum;
360
- rseFactor = HllUtil<A>::HLL_HIP_RSE_FACTOR;
356
+ estimate = hipAccum_;
357
+ rseFactor = hll_constants::HLL_HIP_RSE_FACTOR;
361
358
  }
362
359
 
363
360
  double relErr;
364
- if (this->lgConfigK > 12) {
361
+ if (this->lgConfigK_ > 12) {
365
362
  relErr = (-1.0) * (numStdDev * rseFactor) / sqrt(configK);
366
363
  } else {
367
- relErr = HllUtil<A>::getRelErr(true, oooFlag, this->lgConfigK, numStdDev);
364
+ relErr = HllUtil<A>::getRelErr(true, oooFlag_, this->lgConfigK_, numStdDev);
368
365
  }
369
366
  return estimate / (1.0 + relErr);
370
367
  }
@@ -378,21 +375,21 @@ double HllArray<A>::getUpperBound(const int numStdDev) const {
378
375
  // Original C: again-two-registers.c hhb_get_composite_estimate L1489
379
376
  template<typename A>
380
377
  double HllArray<A>::getCompositeEstimate() const {
381
- const double rawEst = getHllRawEstimate(this->lgConfigK, kxq0 + kxq1);
378
+ const double rawEst = getHllRawEstimate();
382
379
 
383
- const double* xArr = CompositeInterpolationXTable<A>::get_x_arr(this->lgConfigK);
384
- const int xArrLen = CompositeInterpolationXTable<A>::get_x_arr_length();
385
- const double yStride = CompositeInterpolationXTable<A>::get_y_stride(this->lgConfigK);
380
+ const double* xArr = CompositeInterpolationXTable<A>::get_x_arr(this->lgConfigK_);
381
+ const uint32_t xArrLen = CompositeInterpolationXTable<A>::get_x_arr_length();
382
+ const double yStride = CompositeInterpolationXTable<A>::get_y_stride(this->lgConfigK_);
386
383
 
387
384
  if (rawEst < xArr[0]) {
388
385
  return 0;
389
386
  }
390
387
 
391
- const int xArrLenM1 = xArrLen - 1;
388
+ const uint32_t xArrLenM1 = xArrLen - 1;
392
389
 
393
390
  if (rawEst > xArr[xArrLenM1]) {
394
- double finalY = yStride * xArrLenM1;
395
- double factor = finalY / xArr[xArrLenM1];
391
+ const double finalY = yStride * xArrLenM1;
392
+ const double factor = finalY / xArr[xArrLenM1];
396
393
  return rawEst * factor;
397
394
  }
398
395
 
@@ -401,10 +398,9 @@ double HllArray<A>::getCompositeEstimate() const {
401
398
  // We need to completely avoid the linear_counting estimator if it might have a crazy value.
402
399
  // Empirical evidence suggests that the threshold 3*k will keep us safe if 2^4 <= k <= 2^21.
403
400
 
404
- if (adjEst > (3 << this->lgConfigK)) { return adjEst; }
401
+ if (adjEst > (3 << this->lgConfigK_)) { return adjEst; }
405
402
 
406
- const double linEst =
407
- getHllBitMapEstimate(this->lgConfigK, curMin, numAtCurMin);
403
+ const double linEst = getHllBitMapEstimate();
408
404
 
409
405
  // Bias is created when the value of an estimator is compared with a threshold to decide whether
410
406
  // to use that estimator or a different one.
@@ -416,70 +412,70 @@ double HllArray<A>::getCompositeEstimate() const {
416
412
  // The following constants comes from empirical measurements of the crossover point
417
413
  // between the average error of the linear estimator and the adjusted hll estimator
418
414
  double crossOver = 0.64;
419
- if (this->lgConfigK == 4) { crossOver = 0.718; }
420
- else if (this->lgConfigK == 5) { crossOver = 0.672; }
415
+ if (this->lgConfigK_ == 4) { crossOver = 0.718; }
416
+ else if (this->lgConfigK_ == 5) { crossOver = 0.672; }
421
417
 
422
- return (avgEst > (crossOver * (1 << this->lgConfigK))) ? adjEst : linEst;
418
+ return (avgEst > (crossOver * (1 << this->lgConfigK_))) ? adjEst : linEst;
423
419
  }
424
420
 
425
421
  template<typename A>
426
422
  double HllArray<A>::getKxQ0() const {
427
- return kxq0;
423
+ return kxq0_;
428
424
  }
429
425
 
430
426
  template<typename A>
431
427
  double HllArray<A>::getKxQ1() const {
432
- return kxq1;
428
+ return kxq1_;
433
429
  }
434
430
 
435
431
  template<typename A>
436
432
  double HllArray<A>::getHipAccum() const {
437
- return hipAccum;
433
+ return hipAccum_;
438
434
  }
439
435
 
440
436
  template<typename A>
441
- int HllArray<A>::getCurMin() const {
442
- return curMin;
437
+ uint8_t HllArray<A>::getCurMin() const {
438
+ return curMin_;
443
439
  }
444
440
 
445
441
  template<typename A>
446
- int HllArray<A>::getNumAtCurMin() const {
447
- return numAtCurMin;
442
+ uint32_t HllArray<A>::getNumAtCurMin() const {
443
+ return numAtCurMin_;
448
444
  }
449
445
 
450
446
  template<typename A>
451
- void HllArray<A>::putKxQ0(const double kxq0) {
452
- this->kxq0 = kxq0;
447
+ void HllArray<A>::putKxQ0(double kxq0) {
448
+ kxq0_ = kxq0;
453
449
  }
454
450
 
455
451
  template<typename A>
456
- void HllArray<A>::putKxQ1(const double kxq1) {
457
- this->kxq1 = kxq1;
452
+ void HllArray<A>::putKxQ1(double kxq1) {
453
+ kxq1_ = kxq1;
458
454
  }
459
455
 
460
456
  template<typename A>
461
- void HllArray<A>::putHipAccum(const double hipAccum) {
462
- this->hipAccum = hipAccum;
457
+ void HllArray<A>::putHipAccum(double hipAccum) {
458
+ hipAccum_ = hipAccum;
463
459
  }
464
460
 
465
461
  template<typename A>
466
- void HllArray<A>::putCurMin(const int curMin) {
467
- this->curMin = curMin;
462
+ void HllArray<A>::putCurMin(uint8_t curMin) {
463
+ curMin_ = curMin;
468
464
  }
469
465
 
470
466
  template<typename A>
471
- void HllArray<A>::putNumAtCurMin(const int numAtCurMin) {
472
- this->numAtCurMin = numAtCurMin;
467
+ void HllArray<A>::putNumAtCurMin(uint32_t numAtCurMin) {
468
+ numAtCurMin_ = numAtCurMin;
473
469
  }
474
470
 
475
471
  template<typename A>
476
472
  void HllArray<A>::decNumAtCurMin() {
477
- --numAtCurMin;
473
+ --numAtCurMin_;
478
474
  }
479
475
 
480
476
  template<typename A>
481
- void HllArray<A>::addToHipAccum(const double delta) {
482
- hipAccum += delta;
477
+ void HllArray<A>::addToHipAccum(double delta) {
478
+ hipAccum_ += delta;
483
479
  }
484
480
 
485
481
  template<typename A>
@@ -489,22 +485,22 @@ bool HllArray<A>::isCompact() const {
489
485
 
490
486
  template<typename A>
491
487
  bool HllArray<A>::isEmpty() const {
492
- const int configK = 1 << this->lgConfigK;
488
+ const uint32_t configK = 1 << this->lgConfigK_;
493
489
  return (getCurMin() == 0) && (getNumAtCurMin() == configK);
494
490
  }
495
491
 
496
492
  template<typename A>
497
493
  void HllArray<A>::putOutOfOrderFlag(bool flag) {
498
- oooFlag = flag;
494
+ oooFlag_ = flag;
499
495
  }
500
496
 
501
497
  template<typename A>
502
498
  bool HllArray<A>::isOutOfOrderFlag() const {
503
- return oooFlag;
499
+ return oooFlag_;
504
500
  }
505
501
 
506
502
  template<typename A>
507
- int HllArray<A>::hllArrBytes(target_hll_type tgtHllType, int lgConfigK) {
503
+ uint32_t HllArray<A>::hllArrBytes(target_hll_type tgtHllType, uint8_t lgConfigK) {
508
504
  switch (tgtHllType) {
509
505
  case HLL_4:
510
506
  return hll4ArrBytes(lgConfigK);
@@ -518,41 +514,41 @@ int HllArray<A>::hllArrBytes(target_hll_type tgtHllType, int lgConfigK) {
518
514
  }
519
515
 
520
516
  template<typename A>
521
- int HllArray<A>::hll4ArrBytes(const int lgConfigK) {
517
+ uint32_t HllArray<A>::hll4ArrBytes(uint8_t lgConfigK) {
522
518
  return 1 << (lgConfigK - 1);
523
519
  }
524
520
 
525
521
  template<typename A>
526
- int HllArray<A>::hll6ArrBytes(const int lgConfigK) {
527
- const int numSlots = 1 << lgConfigK;
522
+ uint32_t HllArray<A>::hll6ArrBytes(uint8_t lgConfigK) {
523
+ const uint32_t numSlots = 1 << lgConfigK;
528
524
  return ((numSlots * 3) >> 2) + 1;
529
525
  }
530
526
 
531
527
  template<typename A>
532
- int HllArray<A>::hll8ArrBytes(const int lgConfigK) {
528
+ uint32_t HllArray<A>::hll8ArrBytes(uint8_t lgConfigK) {
533
529
  return 1 << lgConfigK;
534
530
  }
535
531
 
536
532
  template<typename A>
537
- int HllArray<A>::getMemDataStart() const {
538
- return HllUtil<A>::HLL_BYTE_ARR_START;
533
+ uint32_t HllArray<A>::getMemDataStart() const {
534
+ return hll_constants::HLL_BYTE_ARR_START;
539
535
  }
540
536
 
541
537
  template<typename A>
542
- int HllArray<A>::getUpdatableSerializationBytes() const {
543
- return HllUtil<A>::HLL_BYTE_ARR_START + getHllByteArrBytes();
538
+ uint32_t HllArray<A>::getUpdatableSerializationBytes() const {
539
+ return hll_constants::HLL_BYTE_ARR_START + getHllByteArrBytes();
544
540
  }
545
541
 
546
542
  template<typename A>
547
- int HllArray<A>::getCompactSerializationBytes() const {
543
+ uint32_t HllArray<A>::getCompactSerializationBytes() const {
548
544
  AuxHashMap<A>* auxHashMap = getAuxHashMap();
549
- const int auxCountBytes = ((auxHashMap == nullptr) ? 0 : auxHashMap->getCompactSizeBytes());
550
- return HllUtil<A>::HLL_BYTE_ARR_START + getHllByteArrBytes() + auxCountBytes;
545
+ const uint32_t auxCountBytes = ((auxHashMap == nullptr) ? 0 : auxHashMap->getCompactSizeBytes());
546
+ return hll_constants::HLL_BYTE_ARR_START + getHllByteArrBytes() + auxCountBytes;
551
547
  }
552
548
 
553
549
  template<typename A>
554
- int HllArray<A>::getPreInts() const {
555
- return HllUtil<A>::HLL_PREINTS;
550
+ uint8_t HllArray<A>::getPreInts() const {
551
+ return hll_constants::HLL_PREINTS;
556
552
  }
557
553
 
558
554
  template<typename A>
@@ -562,14 +558,14 @@ AuxHashMap<A>* HllArray<A>::getAuxHashMap() const {
562
558
 
563
559
  template<typename A>
564
560
  void HllArray<A>::hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue) {
565
- const int configK = 1 << this->getLgConfigK();
561
+ const uint32_t configK = 1 << this->getLgConfigK();
566
562
  // update hip BEFORE updating kxq
567
- if (!oooFlag) hipAccum += configK / (kxq0 + kxq1);
563
+ if (!oooFlag_) hipAccum_ += configK / (kxq0_ + kxq1_);
568
564
  // update kxq0 and kxq1; subtract first, then add
569
- if (oldValue < 32) { kxq0 -= INVERSE_POWERS_OF_2[oldValue]; }
570
- else { kxq1 -= INVERSE_POWERS_OF_2[oldValue]; }
571
- if (newValue < 32) { kxq0 += INVERSE_POWERS_OF_2[newValue]; }
572
- else { kxq1 += INVERSE_POWERS_OF_2[newValue]; }
565
+ if (oldValue < 32) { kxq0_ -= INVERSE_POWERS_OF_2[oldValue]; }
566
+ else { kxq1_ -= INVERSE_POWERS_OF_2[oldValue]; }
567
+ if (newValue < 32) { kxq0_ += INVERSE_POWERS_OF_2[newValue]; }
568
+ else { kxq1_ += INVERSE_POWERS_OF_2[newValue]; }
573
569
  }
574
570
 
575
571
  /**
@@ -579,91 +575,91 @@ void HllArray<A>::hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue)
579
575
  */
580
576
  //In C: again-two-registers.c hhb_get_improved_linear_counting_estimate L1274
581
577
  template<typename A>
582
- double HllArray<A>::getHllBitMapEstimate(const int lgConfigK, const int curMin, const int numAtCurMin) const {
583
- const int configK = 1 << lgConfigK;
584
- const int numUnhitBuckets = ((curMin == 0) ? numAtCurMin : 0);
578
+ double HllArray<A>::getHllBitMapEstimate() const {
579
+ const uint32_t configK = 1 << this->lgConfigK_;
580
+ const uint32_t numUnhitBuckets = curMin_ == 0 ? numAtCurMin_ : 0;
585
581
 
586
582
  //This will eventually go away.
587
583
  if (numUnhitBuckets == 0) {
588
584
  return configK * log(configK / 0.5);
589
585
  }
590
586
 
591
- const int numHitBuckets = configK - numUnhitBuckets;
587
+ const uint32_t numHitBuckets = configK - numUnhitBuckets;
592
588
  return HarmonicNumbers<A>::getBitMapEstimate(configK, numHitBuckets);
593
589
  }
594
590
 
595
591
  //In C: again-two-registers.c hhb_get_raw_estimate L1167
596
592
  template<typename A>
597
- double HllArray<A>::getHllRawEstimate(const int lgConfigK, const double kxqSum) const {
598
- const int configK = 1 << lgConfigK;
593
+ double HllArray<A>::getHllRawEstimate() const {
594
+ const uint32_t configK = 1 << this->lgConfigK_;
599
595
  double correctionFactor;
600
- if (lgConfigK == 4) { correctionFactor = 0.673; }
601
- else if (lgConfigK == 5) { correctionFactor = 0.697; }
602
- else if (lgConfigK == 6) { correctionFactor = 0.709; }
596
+ if (this->lgConfigK_ == 4) { correctionFactor = 0.673; }
597
+ else if (this->lgConfigK_ == 5) { correctionFactor = 0.697; }
598
+ else if (this->lgConfigK_ == 6) { correctionFactor = 0.709; }
603
599
  else { correctionFactor = 0.7213 / (1.0 + (1.079 / configK)); }
604
- const double hyperEst = (correctionFactor * configK * configK) / kxqSum;
600
+ const double hyperEst = (correctionFactor * configK * configK) / (kxq0_ + kxq1_);
605
601
  return hyperEst;
606
602
  }
607
603
 
608
604
  template<typename A>
609
605
  typename HllArray<A>::const_iterator HllArray<A>::begin(bool all) const {
610
- return const_iterator(hllByteArr.data(), 1 << this->lgConfigK, 0, this->tgtHllType, nullptr, 0, all);
606
+ return const_iterator(hllByteArr_.data(), 1 << this->lgConfigK_, 0, this->tgtHllType_, nullptr, 0, all);
611
607
  }
612
608
 
613
609
  template<typename A>
614
610
  typename HllArray<A>::const_iterator HllArray<A>::end() const {
615
- return const_iterator(hllByteArr.data(), 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, nullptr, 0, false);
611
+ return const_iterator(hllByteArr_.data(), 1 << this->lgConfigK_, 1 << this->lgConfigK_, this->tgtHllType_, nullptr, 0, false);
616
612
  }
617
613
 
618
614
  template<typename A>
619
- HllArray<A>::const_iterator::const_iterator(const uint8_t* array, size_t array_size, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all):
620
- array(array), array_size(array_size), index(index), hll_type(hll_type), exceptions(exceptions), offset(offset), all(all)
615
+ HllArray<A>::const_iterator::const_iterator(const uint8_t* array, uint32_t array_size, uint32_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all):
616
+ array_(array), array_size_(array_size), index_(index), hll_type_(hll_type), exceptions_(exceptions), offset_(offset), all_(all)
621
617
  {
622
- while (this->index < array_size) {
623
- value = get_value(array, this->index, hll_type, exceptions, offset);
624
- if (all || value != HllUtil<A>::EMPTY) break;
625
- this->index++;
618
+ while (index_ < array_size_) {
619
+ value_ = get_value(array_, index_, hll_type_, exceptions_, offset_);
620
+ if (all_ || value_ != hll_constants::EMPTY) break;
621
+ ++index_;
626
622
  }
627
623
  }
628
624
 
629
625
  template<typename A>
630
626
  typename HllArray<A>::const_iterator& HllArray<A>::const_iterator::operator++() {
631
- while (++index < array_size) {
632
- value = get_value(array, index, hll_type, exceptions, offset);
633
- if (all || value != HllUtil<A>::EMPTY) break;
627
+ while (++index_ < array_size_) {
628
+ value_ = get_value(array_, index_, hll_type_, exceptions_, offset_);
629
+ if (all_ || value_ != hll_constants::EMPTY) break;
634
630
  }
635
631
  return *this;
636
632
  }
637
633
 
638
634
  template<typename A>
639
635
  bool HllArray<A>::const_iterator::operator!=(const const_iterator& other) const {
640
- return index != other.index;
636
+ return index_ != other.index_;
641
637
  }
642
638
 
643
639
  template<typename A>
644
640
  uint32_t HllArray<A>::const_iterator::operator*() const {
645
- return HllUtil<A>::pair(index, value);
641
+ return HllUtil<A>::pair(index_, value_);
646
642
  }
647
643
 
648
644
  template<typename A>
649
- uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset) {
645
+ uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array, uint32_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset) {
650
646
  if (hll_type == target_hll_type::HLL_4) {
651
647
  uint8_t value = array[index >> 1];
652
648
  if ((index & 1) > 0) { // odd
653
649
  value >>= 4;
654
650
  } else {
655
- value &= HllUtil<A>::loNibbleMask;
651
+ value &= hll_constants::loNibbleMask;
656
652
  }
657
- if (value == HllUtil<A>::AUX_TOKEN) { // exception
653
+ if (value == hll_constants::AUX_TOKEN) { // exception
658
654
  return exceptions->mustFindValueFor(index);
659
655
  }
660
656
  return value + offset;
661
657
  } else if (hll_type == target_hll_type::HLL_6) {
662
- const int start_bit = index * 6;
663
- const int shift = start_bit & 0x7;
664
- const int byte_idx = start_bit >> 3;
658
+ const size_t start_bit = index * 6;
659
+ const uint8_t shift = start_bit & 0x7;
660
+ const size_t byte_idx = start_bit >> 3;
665
661
  const uint16_t two_byte_val = (array[byte_idx + 1] << 8) | array[byte_idx];
666
- return (two_byte_val >> shift) & HllUtil<A>::VAL_MASK_6;
662
+ return (two_byte_val >> shift) & hll_constants::VAL_MASK_6;
667
663
  }
668
664
  // HLL_8
669
665
  return array[index];
@@ -671,7 +667,7 @@ uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array, size_t inde
671
667
 
672
668
  template<typename A>
673
669
  A HllArray<A>::getAllocator() const {
674
- return hllByteArr.get_allocator();
670
+ return hllByteArr_.get_allocator();
675
671
  }
676
672
 
677
673
  }