datasketches 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +7 -7
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  17. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  18. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  19. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  20. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  21. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  22. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  26. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
  31. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  32. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  33. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  34. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  35. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  36. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  37. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  38. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  39. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  40. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
  41. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  42. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  43. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  44. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  45. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  46. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  47. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  48. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  49. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  50. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  51. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  52. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  53. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  54. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  55. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  56. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  57. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  58. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  59. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  60. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  61. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  62. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
  63. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  64. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  65. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  66. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  67. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  68. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  69. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  70. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  71. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  72. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  73. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  74. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  75. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  76. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  77. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  78. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  79. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  80. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  81. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  82. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  83. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  84. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  85. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
  86. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
  87. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  88. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  89. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  90. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
  91. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  92. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  93. data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
  94. data/vendor/datasketches-cpp/python/README.md +50 -50
  95. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  96. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  97. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  98. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
  99. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
  100. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  101. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  102. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  103. data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
  104. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  105. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  106. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  107. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  108. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  109. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  110. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  111. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  112. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
  113. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  114. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
  117. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
  118. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  119. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  120. data/vendor/datasketches-cpp/setup.py +10 -7
  121. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  122. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  124. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
  125. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  126. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  127. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  128. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  129. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  130. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  131. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  132. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
  133. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
  134. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
  135. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
  137. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
  139. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
  140. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  141. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  142. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  143. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  144. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  145. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  146. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  147. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  148. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  149. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  150. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
  151. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
  152. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  153. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  154. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
  155. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  157. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  158. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
  159. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  160. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  161. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
  162. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  163. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  164. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  165. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  166. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
  167. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
  168. metadata +18 -7
  169. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  170. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -28,10 +28,10 @@
28
28
  namespace datasketches {
29
29
 
30
30
  template<typename A>
31
- static int find(const int* array, const int lgArrInts, const int coupon);
31
+ static int32_t find(const uint32_t* array, uint8_t lgArrInts, uint32_t coupon);
32
32
 
33
33
  template<typename A>
34
- CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHllType, const A& allocator)
34
+ CouponHashSet<A>::CouponHashSet(uint8_t lgConfigK, target_hll_type tgtHllType, const A& allocator)
35
35
  : CouponList<A>(lgConfigK, tgtHllType, hll_mode::SET, allocator)
36
36
  {
37
37
  if (lgConfigK <= 7) {
@@ -56,45 +56,45 @@ std::function<void(HllSketchImpl<A>*)> CouponHashSet<A>::get_deleter() const {
56
56
 
57
57
  template<typename A>
58
58
  CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len, const A& allocator) {
59
- if (len < HllUtil<A>::HASH_SET_INT_ARR_START) { // hard-coded
59
+ if (len < hll_constants::HASH_SET_INT_ARR_START) { // hard-coded
60
60
  throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
61
61
  }
62
62
 
63
63
  const uint8_t* data = static_cast<const uint8_t*>(bytes);
64
- if (data[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HASH_SET_PREINTS) {
64
+ if (data[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::HASH_SET_PREINTS) {
65
65
  throw std::invalid_argument("Incorrect number of preInts in input stream");
66
66
  }
67
- if (data[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
67
+ if (data[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
68
68
  throw std::invalid_argument("Wrong ser ver in input stream");
69
69
  }
70
- if (data[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
70
+ if (data[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
71
71
  throw std::invalid_argument("Input stream is not an HLL sketch");
72
72
  }
73
73
 
74
- const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
74
+ const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[hll_constants::MODE_BYTE]);
75
75
  if (mode != SET) {
76
76
  throw std::invalid_argument("Calling set constructor with non-set mode data");
77
77
  }
78
78
 
79
- const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
79
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[hll_constants::MODE_BYTE]);
80
80
 
81
- const int lgK = data[HllUtil<A>::LG_K_BYTE];
81
+ const uint8_t lgK = data[hll_constants::LG_K_BYTE];
82
82
  if (lgK <= 7) {
83
83
  throw std::invalid_argument("Attempt to deserialize invalid CouponHashSet with lgConfigK <= 7. Found: "
84
84
  + std::to_string(lgK));
85
85
  }
86
- int lgArrInts = data[HllUtil<A>::LG_ARR_BYTE];
87
- const bool compactFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
86
+ uint8_t lgArrInts = data[hll_constants::LG_ARR_BYTE];
87
+ const bool compactFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ? true : false);
88
88
 
89
- int couponCount;
90
- std::memcpy(&couponCount, data + HllUtil<A>::HASH_SET_COUNT_INT, sizeof(couponCount));
91
- if (lgArrInts < HllUtil<A>::LG_INIT_SET_SIZE) {
92
- lgArrInts = HllUtil<A>::computeLgArrInts(SET, couponCount, lgK);
89
+ uint32_t couponCount;
90
+ std::memcpy(&couponCount, data + hll_constants::HASH_SET_COUNT_INT, sizeof(couponCount));
91
+ if (lgArrInts < hll_constants::LG_INIT_SET_SIZE) {
92
+ lgArrInts = HllUtil<>::computeLgArrInts(SET, couponCount, lgK);
93
93
  }
94
94
  // Don't set couponCount in sketch here;
95
95
  // we'll set later if updatable, and increment with updates if compact
96
- const int couponsInArray = (compactFlag ? couponCount : (1 << lgArrInts));
97
- const size_t expectedLength = HllUtil<A>::HASH_SET_INT_ARR_START + (couponsInArray * sizeof(int));
96
+ const uint32_t couponsInArray = (compactFlag ? couponCount : (1 << lgArrInts));
97
+ const size_t expectedLength = hll_constants::HASH_SET_INT_ARR_START + (couponsInArray * sizeof(uint32_t));
98
98
  if (len < expectedLength) {
99
99
  throw std::out_of_range("Byte array too short for sketch. Expected " + std::to_string(expectedLength)
100
100
  + ", found: " + std::to_string(len));
@@ -104,19 +104,19 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len, const
104
104
  CouponHashSet<A>* sketch = new (chsa.allocate(1)) CouponHashSet<A>(lgK, tgtHllType, allocator);
105
105
 
106
106
  if (compactFlag) {
107
- const uint8_t* curPos = data + HllUtil<A>::HASH_SET_INT_ARR_START;
108
- int coupon;
109
- for (int i = 0; i < couponCount; ++i, curPos += sizeof(coupon)) {
107
+ const uint8_t* curPos = data + hll_constants::HASH_SET_INT_ARR_START;
108
+ uint32_t coupon;
109
+ for (uint32_t i = 0; i < couponCount; ++i, curPos += sizeof(coupon)) {
110
110
  std::memcpy(&coupon, curPos, sizeof(coupon));
111
111
  sketch->couponUpdate(coupon);
112
112
  }
113
113
  } else {
114
- sketch->coupons.resize(1 << lgArrInts);
115
- sketch->couponCount = couponCount;
114
+ sketch->coupons_.resize(1ULL << lgArrInts);
115
+ sketch->couponCount_ = couponCount;
116
116
  // only need to read valid coupons, unlike in stream case
117
- std::memcpy(sketch->coupons.data(),
118
- data + HllUtil<A>::HASH_SET_INT_ARR_START,
119
- couponCount * sizeof(int));
117
+ std::memcpy(sketch->coupons_.data(),
118
+ data + hll_constants::HASH_SET_INT_ARR_START,
119
+ couponCount * sizeof(uint32_t));
120
120
  }
121
121
 
122
122
  return sketch;
@@ -125,37 +125,36 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len, const
125
125
  template<typename A>
126
126
  CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is, const A& allocator) {
127
127
  uint8_t listHeader[8];
128
- is.read((char*)listHeader, 8 * sizeof(uint8_t));
128
+ read(is, listHeader, 8 * sizeof(uint8_t));
129
129
 
130
- if (listHeader[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HASH_SET_PREINTS) {
130
+ if (listHeader[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::HASH_SET_PREINTS) {
131
131
  throw std::invalid_argument("Incorrect number of preInts in input stream");
132
132
  }
133
- if (listHeader[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
133
+ if (listHeader[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
134
134
  throw std::invalid_argument("Wrong ser ver in input stream");
135
135
  }
136
- if (listHeader[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
136
+ if (listHeader[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
137
137
  throw std::invalid_argument("Input stream is not an HLL sketch");
138
138
  }
139
139
 
140
- hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
140
+ hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[hll_constants::MODE_BYTE]);
141
141
  if (mode != SET) {
142
142
  throw std::invalid_argument("Calling set constructor with non-set mode data");
143
143
  }
144
144
 
145
- target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
145
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[hll_constants::MODE_BYTE]);
146
146
 
147
- const int lgK = listHeader[HllUtil<A>::LG_K_BYTE];
147
+ const uint8_t lgK = listHeader[hll_constants::LG_K_BYTE];
148
148
  if (lgK <= 7) {
149
149
  throw std::invalid_argument("Attempt to deserialize invalid CouponHashSet with lgConfigK <= 7. Found: "
150
150
  + std::to_string(lgK));
151
151
  }
152
- int lgArrInts = listHeader[HllUtil<A>::LG_ARR_BYTE];
153
- const bool compactFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
152
+ uint8_t lgArrInts = listHeader[hll_constants::LG_ARR_BYTE];
153
+ const bool compactFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ? true : false);
154
154
 
155
- int couponCount;
156
- is.read((char*)&couponCount, sizeof(couponCount));
157
- if (lgArrInts < HllUtil<A>::LG_INIT_SET_SIZE) {
158
- lgArrInts = HllUtil<A>::computeLgArrInts(SET, couponCount, lgK);
155
+ const auto couponCount = read<uint32_t>(is);
156
+ if (lgArrInts < hll_constants::LG_INIT_SET_SIZE) {
157
+ lgArrInts = HllUtil<>::computeLgArrInts(SET, couponCount, lgK);
159
158
  }
160
159
 
161
160
  ChsAlloc chsa(allocator);
@@ -166,16 +165,15 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is, const A& allocator)
166
165
  // Don't set couponCount here;
167
166
  // we'll set later if updatable, and increment with updates if compact
168
167
  if (compactFlag) {
169
- for (int i = 0; i < couponCount; ++i) {
170
- int coupon;
171
- is.read((char*)&coupon, sizeof(coupon));
168
+ for (uint32_t i = 0; i < couponCount; ++i) {
169
+ const auto coupon = read<uint32_t>(is);
172
170
  sketch->couponUpdate(coupon);
173
171
  }
174
172
  } else {
175
- sketch->coupons.resize(1 << lgArrInts);
176
- sketch->couponCount = couponCount;
173
+ sketch->coupons_.resize(1ULL << lgArrInts);
174
+ sketch->couponCount_ = couponCount;
177
175
  // for stream processing, read entire list so read pointer ends up set correctly
178
- is.read((char*)sketch->coupons.data(), sketch->coupons.size() * sizeof(int));
176
+ read(is, sketch->coupons_.data(), sketch->coupons_.size() * sizeof(uint32_t));
179
177
  }
180
178
 
181
179
  if (!is.good())
@@ -186,25 +184,25 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is, const A& allocator)
186
184
 
187
185
  template<typename A>
188
186
  CouponHashSet<A>* CouponHashSet<A>::copy() const {
189
- ChsAlloc chsa(this->coupons.get_allocator());
187
+ ChsAlloc chsa(this->coupons_.get_allocator());
190
188
  return new (chsa.allocate(1)) CouponHashSet<A>(*this);
191
189
  }
192
190
 
193
191
  template<typename A>
194
- CouponHashSet<A>* CouponHashSet<A>::copyAs(const target_hll_type tgtHllType) const {
195
- ChsAlloc chsa(this->coupons.get_allocator());
192
+ CouponHashSet<A>* CouponHashSet<A>::copyAs(target_hll_type tgtHllType) const {
193
+ ChsAlloc chsa(this->coupons_.get_allocator());
196
194
  return new (chsa.allocate(1)) CouponHashSet<A>(*this, tgtHllType);
197
195
  }
198
196
 
199
197
  template<typename A>
200
- HllSketchImpl<A>* CouponHashSet<A>::couponUpdate(int coupon) {
201
- const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(this->coupons.size());
202
- const int index = find<A>(this->coupons.data(), lgCouponArrInts, coupon);
198
+ HllSketchImpl<A>* CouponHashSet<A>::couponUpdate(uint32_t coupon) {
199
+ const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(static_cast<uint32_t>(this->coupons_.size()));
200
+ const int32_t index = find<A>(this->coupons_.data(), lgCouponArrInts, coupon);
203
201
  if (index >= 0) {
204
202
  return this; // found duplicate, ignore
205
203
  }
206
- this->coupons[~index] = coupon; // found empty
207
- ++this->couponCount;
204
+ this->coupons_[~index] = coupon; // found empty
205
+ ++this->couponCount_;
208
206
  if (checkGrowOrPromote()) {
209
207
  return this->promoteHeapListOrSetToHll(*this);
210
208
  }
@@ -212,20 +210,20 @@ HllSketchImpl<A>* CouponHashSet<A>::couponUpdate(int coupon) {
212
210
  }
213
211
 
214
212
  template<typename A>
215
- int CouponHashSet<A>::getMemDataStart() const {
216
- return HllUtil<A>::HASH_SET_INT_ARR_START;
213
+ uint32_t CouponHashSet<A>::getMemDataStart() const {
214
+ return hll_constants::HASH_SET_INT_ARR_START;
217
215
  }
218
216
 
219
217
  template<typename A>
220
- int CouponHashSet<A>::getPreInts() const {
221
- return HllUtil<A>::HASH_SET_PREINTS;
218
+ uint8_t CouponHashSet<A>::getPreInts() const {
219
+ return hll_constants::HASH_SET_PREINTS;
222
220
  }
223
221
 
224
222
  template<typename A>
225
223
  bool CouponHashSet<A>::checkGrowOrPromote() {
226
- if (static_cast<size_t>(HllUtil<A>::RESIZE_DENOM * this->couponCount) > (HllUtil<A>::RESIZE_NUMER * this->coupons.size())) {
227
- const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(this->coupons.size());
228
- if (lgCouponArrInts == (this->lgConfigK - 3)) { // at max size
224
+ if (static_cast<size_t>(hll_constants::RESIZE_DENOM * this->couponCount_) > (hll_constants::RESIZE_NUMER * this->coupons_.size())) {
225
+ const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(static_cast<uint32_t>(this->coupons_.size()));
226
+ if (lgCouponArrInts == (this->lgConfigK_ - 3)) { // at max size
229
227
  return true; // promote to HLL
230
228
  }
231
229
  growHashSet(lgCouponArrInts + 1);
@@ -234,15 +232,15 @@ bool CouponHashSet<A>::checkGrowOrPromote() {
234
232
  }
235
233
 
236
234
  template<typename A>
237
- void CouponHashSet<A>::growHashSet(int tgtLgCoupArrSize) {
238
- const int tgtLen = 1 << tgtLgCoupArrSize;
239
- vector_int coupons_new(tgtLen, 0, this->coupons.get_allocator());
240
-
241
- const int srcLen = this->coupons.size();
242
- for (int i = 0; i < srcLen; ++i) { // scan existing array for non-zero values
243
- const int fetched = this->coupons[i];
244
- if (fetched != HllUtil<A>::EMPTY) {
245
- const int idx = find<A>(coupons_new.data(), tgtLgCoupArrSize, fetched); // search TGT array
235
+ void CouponHashSet<A>::growHashSet(uint8_t tgtLgCoupArrSize) {
236
+ const uint32_t tgtLen = 1 << tgtLgCoupArrSize;
237
+ vector_int coupons_new(tgtLen, 0, this->coupons_.get_allocator());
238
+
239
+ const uint32_t srcLen = static_cast<uint32_t>(this->coupons_.size());
240
+ for (uint32_t i = 0; i < srcLen; ++i) { // scan existing array for non-zero values
241
+ const uint32_t fetched = this->coupons_[i];
242
+ if (fetched != hll_constants::EMPTY) {
243
+ const int32_t idx = find<A>(coupons_new.data(), tgtLgCoupArrSize, fetched); // search TGT array
246
244
  if (idx < 0) { // found EMPTY
247
245
  coupons_new[~idx] = fetched; // insert
248
246
  continue;
@@ -250,23 +248,23 @@ void CouponHashSet<A>::growHashSet(int tgtLgCoupArrSize) {
250
248
  throw std::runtime_error("Error: Found duplicate coupon");
251
249
  }
252
250
  }
253
- this->coupons = std::move(coupons_new);
251
+ this->coupons_ = std::move(coupons_new);
254
252
  }
255
253
 
256
254
  template<typename A>
257
- static int find(const int* array, const int lgArrInts, const int coupon) {
258
- const int arrMask = (1 << lgArrInts) - 1;
259
- int probe = coupon & arrMask;
260
- const int loopIndex = probe;
255
+ static int32_t find(const uint32_t* array, uint8_t lgArrInts, uint32_t coupon) {
256
+ const uint32_t arrMask = (1 << lgArrInts) - 1;
257
+ uint32_t probe = coupon & arrMask;
258
+ const uint32_t loopIndex = probe;
261
259
  do {
262
- const int couponAtIdx = array[probe];
263
- if (couponAtIdx == HllUtil<A>::EMPTY) {
260
+ const uint32_t couponAtIdx = array[probe];
261
+ if (couponAtIdx == hll_constants::EMPTY) {
264
262
  return ~probe; //empty
265
263
  }
266
264
  else if (coupon == couponAtIdx) {
267
265
  return probe; //duplicate
268
266
  }
269
- const int stride = ((coupon & HllUtil<A>::KEY_MASK_26) >> lgArrInts) | 1;
267
+ const uint32_t stride = ((coupon & hll_constants::KEY_MASK_26) >> lgArrInts) | 1;
270
268
  probe = (probe + stride) & arrMask;
271
269
  } while (probe != loopIndex);
272
270
  throw std::invalid_argument("Key not found and no empty slots!");
@@ -29,29 +29,29 @@ class CouponHashSet : public CouponList<A> {
29
29
  public:
30
30
  static CouponHashSet* newSet(const void* bytes, size_t len, const A& allocator);
31
31
  static CouponHashSet* newSet(std::istream& is, const A& allocator);
32
- CouponHashSet(int lgConfigK, target_hll_type tgtHllType, const A& allocator);
32
+ CouponHashSet(uint8_t lgConfigK, target_hll_type tgtHllType, const A& allocator);
33
33
  CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType);
34
34
 
35
35
  virtual ~CouponHashSet() = default;
36
36
  virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
37
37
 
38
38
  protected:
39
- using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
39
+ using vector_int = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
40
40
 
41
41
  virtual CouponHashSet* copy() const;
42
42
  virtual CouponHashSet* copyAs(target_hll_type tgtHllType) const;
43
43
 
44
- virtual HllSketchImpl<A>* couponUpdate(int coupon);
44
+ virtual HllSketchImpl<A>* couponUpdate(uint32_t coupon);
45
45
 
46
- virtual int getMemDataStart() const;
47
- virtual int getPreInts() const;
46
+ virtual uint32_t getMemDataStart() const;
47
+ virtual uint8_t getPreInts() const;
48
48
 
49
49
  friend class HllSketchImplFactory<A>;
50
50
 
51
51
  private:
52
52
  using ChsAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>>;
53
53
  bool checkGrowOrPromote();
54
- void growHashSet(int tgtLgCoupArrSize);
54
+ void growHashSet(uint8_t tgtLgCoupArrSize);
55
55
  };
56
56
 
57
57
  }