datasketches 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +7 -7
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  17. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  18. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  19. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  20. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  21. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  22. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  26. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
  31. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  32. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  33. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  34. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  35. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  36. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  37. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  38. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  39. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  40. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
  41. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  42. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  43. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  44. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  45. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  46. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  47. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  48. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  49. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  50. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  51. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  52. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  53. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  54. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  55. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  56. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  57. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  58. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  59. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  60. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  61. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  62. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
  63. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  64. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  65. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  66. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  67. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  68. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  69. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  70. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  71. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  72. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  73. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  74. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  75. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  76. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  77. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  78. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  79. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  80. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  81. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  82. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  83. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  84. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  85. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
  86. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
  87. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  88. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  89. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  90. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
  91. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  92. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  93. data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
  94. data/vendor/datasketches-cpp/python/README.md +50 -50
  95. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  96. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  97. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  98. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
  99. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
  100. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  101. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  102. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  103. data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
  104. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  105. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  106. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  107. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  108. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  109. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  110. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  111. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  112. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
  113. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  114. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
  117. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
  118. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  119. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  120. data/vendor/datasketches-cpp/setup.py +10 -7
  121. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  122. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  124. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
  125. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  126. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  127. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  128. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  129. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  130. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  131. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  132. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
  133. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
  134. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
  135. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
  137. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
  139. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
  140. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  141. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  142. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  143. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  144. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  145. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  146. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  147. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  148. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  149. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  150. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
  151. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
  152. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  153. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  154. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
  155. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  157. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  158. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
  159. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  160. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  161. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
  162. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  163. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  164. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  165. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  166. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
  167. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
  168. metadata +18 -7
  169. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  170. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -31,19 +31,19 @@
31
31
  namespace datasketches {
32
32
 
33
33
  template<typename A>
34
- CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode, const A& allocator):
34
+ CouponList<A>::CouponList(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator):
35
35
  HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false),
36
- couponCount(0),
37
- oooFlag(false),
38
- coupons(1 << (mode == hll_mode::LIST ? HllUtil<A>::LG_INIT_LIST_SIZE : HllUtil<A>::LG_INIT_SET_SIZE), 0, allocator)
36
+ couponCount_(0),
37
+ oooFlag_(false),
38
+ coupons_(1ULL << (mode == hll_mode::LIST ? hll_constants::LG_INIT_LIST_SIZE : hll_constants::LG_INIT_SET_SIZE), 0, allocator)
39
39
  {}
40
40
 
41
41
  template<typename A>
42
42
  CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType):
43
- HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
44
- couponCount(that.couponCount),
45
- oooFlag(that.oooFlag),
46
- coupons(that.coupons)
43
+ HllSketchImpl<A>(that.lgConfigK_, tgtHllType, that.mode_, false),
44
+ couponCount_(that.couponCount_),
45
+ oooFlag_(that.oooFlag_),
46
+ coupons_(that.coupons_)
47
47
  {}
48
48
 
49
49
  template<typename A>
@@ -58,48 +58,48 @@ std::function<void(HllSketchImpl<A>*)> CouponList<A>::get_deleter() const {
58
58
 
59
59
  template<typename A>
60
60
  CouponList<A>* CouponList<A>::copy() const {
61
- ClAlloc cla(coupons.get_allocator());
61
+ ClAlloc cla(coupons_.get_allocator());
62
62
  return new (cla.allocate(1)) CouponList<A>(*this);
63
63
  }
64
64
 
65
65
  template<typename A>
66
66
  CouponList<A>* CouponList<A>::copyAs(target_hll_type tgtHllType) const {
67
- ClAlloc cla(coupons.get_allocator());
67
+ ClAlloc cla(coupons_.get_allocator());
68
68
  return new (cla.allocate(1)) CouponList<A>(*this, tgtHllType);
69
69
  }
70
70
 
71
71
  template<typename A>
72
72
  CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len, const A& allocator) {
73
- if (len < HllUtil<A>::LIST_INT_ARR_START) {
73
+ if (len < hll_constants::LIST_INT_ARR_START) {
74
74
  throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
75
75
  }
76
76
 
77
77
  const uint8_t* data = static_cast<const uint8_t*>(bytes);
78
- if (data[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::LIST_PREINTS) {
78
+ if (data[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::LIST_PREINTS) {
79
79
  throw std::invalid_argument("Incorrect number of preInts in input stream");
80
80
  }
81
- if (data[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
81
+ if (data[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
82
82
  throw std::invalid_argument("Wrong ser ver in input stream");
83
83
  }
84
- if (data[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
84
+ if (data[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
85
85
  throw std::invalid_argument("Input stream is not an HLL sketch");
86
86
  }
87
87
 
88
- hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
88
+ hll_mode mode = HllSketchImpl<A>::extractCurMode(data[hll_constants::MODE_BYTE]);
89
89
  if (mode != LIST) {
90
90
  throw std::invalid_argument("Calling list constructor with non-list mode data");
91
91
  }
92
92
 
93
- target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
93
+ target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[hll_constants::MODE_BYTE]);
94
94
 
95
- const int lgK = data[HllUtil<A>::LG_K_BYTE];
96
- const bool compact = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
97
- const bool oooFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
98
- const bool emptyFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false);
95
+ const uint8_t lgK = data[hll_constants::LG_K_BYTE];
96
+ const bool compact = ((data[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ? true : false);
97
+ const bool oooFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::OUT_OF_ORDER_FLAG_MASK) ? true : false);
98
+ const bool emptyFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::EMPTY_FLAG_MASK) ? true : false);
99
99
 
100
- const int couponCount = data[HllUtil<A>::LIST_COUNT_BYTE];
101
- const int couponsInArray = (compact ? couponCount : (1 << HllUtil<A>::computeLgArrInts(LIST, couponCount, lgK)));
102
- const size_t expectedLength = HllUtil<A>::LIST_INT_ARR_START + (couponsInArray * sizeof(int));
100
+ const uint32_t couponCount = data[hll_constants::LIST_COUNT_BYTE];
101
+ const uint32_t couponsInArray = (compact ? couponCount : (1 << HllUtil<A>::computeLgArrInts(LIST, couponCount, lgK)));
102
+ const size_t expectedLength = hll_constants::LIST_INT_ARR_START + (couponsInArray * sizeof(uint32_t));
103
103
  if (len < expectedLength) {
104
104
  throw std::out_of_range("Byte array too short for sketch. Expected " + std::to_string(expectedLength)
105
105
  + ", found: " + std::to_string(len));
@@ -107,12 +107,12 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len, const A& al
107
107
 
108
108
  ClAlloc cla(allocator);
109
109
  CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
110
- sketch->couponCount = couponCount;
110
+ sketch->couponCount_ = couponCount;
111
111
  sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST
112
112
 
113
113
  if (!emptyFlag) {
114
114
  // only need to read valid coupons, unlike in stream case
115
- std::memcpy(sketch->coupons.data(), data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int));
115
+ std::memcpy(sketch->coupons_.data(), data + hll_constants::LIST_INT_ARR_START, couponCount * sizeof(uint32_t));
116
116
  }
117
117
 
118
118
  return sketch;
@@ -121,44 +121,44 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len, const A& al
121
121
  template<typename A>
122
122
  CouponList<A>* CouponList<A>::newList(std::istream& is, const A& allocator) {
123
123
  uint8_t listHeader[8];
124
- is.read((char*)listHeader, 8 * sizeof(uint8_t));
124
+ read(is, listHeader, 8 * sizeof(uint8_t));
125
125
 
126
- if (listHeader[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::LIST_PREINTS) {
126
+ if (listHeader[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::LIST_PREINTS) {
127
127
  throw std::invalid_argument("Incorrect number of preInts in input stream");
128
128
  }
129
- if (listHeader[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
129
+ if (listHeader[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
130
130
  throw std::invalid_argument("Wrong ser ver in input stream");
131
131
  }
132
- if (listHeader[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
132
+ if (listHeader[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
133
133
  throw std::invalid_argument("Input stream is not an HLL sketch");
134
134
  }
135
135
 
136
- hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
136
+ hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[hll_constants::MODE_BYTE]);
137
137
  if (mode != LIST) {
138
138
  throw std::invalid_argument("Calling list constructor with non-list mode data");
139
139
  }
140
140
 
141
- const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
141
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[hll_constants::MODE_BYTE]);
142
142
 
143
- const int lgK = (int) listHeader[HllUtil<A>::LG_K_BYTE];
144
- const bool compact = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
145
- const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
146
- const bool emptyFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false);
143
+ const uint8_t lgK = listHeader[hll_constants::LG_K_BYTE];
144
+ const bool compact = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ? true : false);
145
+ const bool oooFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::OUT_OF_ORDER_FLAG_MASK) ? true : false);
146
+ const bool emptyFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::EMPTY_FLAG_MASK) ? true : false);
147
147
 
148
148
  ClAlloc cla(allocator);
149
149
  CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
150
150
  using coupon_list_ptr = std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>>;
151
151
  coupon_list_ptr ptr(sketch, sketch->get_deleter());
152
- const int couponCount = listHeader[HllUtil<A>::LIST_COUNT_BYTE];
153
- sketch->couponCount = couponCount;
152
+ const uint32_t couponCount = listHeader[hll_constants::LIST_COUNT_BYTE];
153
+ sketch->couponCount_ = couponCount;
154
154
  sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST
155
155
 
156
156
  if (!emptyFlag) {
157
157
  // For stream processing, need to read entire number written to stream so read
158
158
  // pointer ends up set correctly.
159
159
  // If not compact, still need to read empty items even though in order.
160
- const int numToRead = (compact ? couponCount : sketch->coupons.size());
161
- is.read((char*)sketch->coupons.data(), numToRead * sizeof(int));
160
+ const uint32_t numToRead = (compact ? couponCount : static_cast<uint32_t>(sketch->coupons_.size()));
161
+ read(is, sketch->coupons_.data(), numToRead * sizeof(uint32_t));
162
162
  }
163
163
 
164
164
  if (!is.good())
@@ -173,17 +173,17 @@ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes)
173
173
  vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
174
174
  uint8_t* bytes = byteArr.data() + header_size_bytes;
175
175
 
176
- bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
177
- bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER);
178
- bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID);
179
- bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK);
180
- bytes[HllUtil<A>::LG_ARR_BYTE] = count_trailing_zeros_in_u32(coupons.size());
181
- bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact);
182
- bytes[HllUtil<A>::LIST_COUNT_BYTE] = static_cast<uint8_t>(this->mode == LIST ? couponCount : 0);
183
- bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte();
184
-
185
- if (this->mode == SET) {
186
- std::memcpy(bytes + HllUtil<A>::HASH_SET_COUNT_INT, &couponCount, sizeof(couponCount));
176
+ bytes[hll_constants::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
177
+ bytes[hll_constants::SER_VER_BYTE] = static_cast<uint8_t>(hll_constants::SER_VER);
178
+ bytes[hll_constants::FAMILY_BYTE] = static_cast<uint8_t>(hll_constants::FAMILY_ID);
179
+ bytes[hll_constants::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK_);
180
+ bytes[hll_constants::LG_ARR_BYTE] = count_trailing_zeros_in_u32(static_cast<uint32_t>(coupons_.size()));
181
+ bytes[hll_constants::FLAGS_BYTE] = this->makeFlagsByte(compact);
182
+ bytes[hll_constants::LIST_COUNT_BYTE] = static_cast<uint8_t>(this->mode_ == LIST ? couponCount_ : 0);
183
+ bytes[hll_constants::MODE_BYTE] = this->makeModeByte();
184
+
185
+ if (this->mode_ == SET) {
186
+ std::memcpy(bytes + hll_constants::HASH_SET_COUNT_INT, &couponCount_, sizeof(couponCount_));
187
187
  }
188
188
 
189
189
  // coupons
@@ -191,12 +191,12 @@ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes)
191
191
  const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
192
192
  switch (sw) {
193
193
  case 0: { // src updatable, dst updatable
194
- std::memcpy(bytes + getMemDataStart(), coupons.data(), coupons.size() * sizeof(int));
194
+ std::memcpy(bytes + getMemDataStart(), coupons_.data(), coupons_.size() * sizeof(uint32_t));
195
195
  break;
196
196
  }
197
197
  case 1: { // src updatable, dst compact
198
198
  bytes += getMemDataStart(); // reusing pointer for incremental writes
199
- for (uint32_t coupon: *this) {
199
+ for (const uint32_t coupon: *this) {
200
200
  std::memcpy(bytes, &coupon, sizeof(coupon));
201
201
  bytes += sizeof(coupon);
202
202
  }
@@ -213,33 +213,33 @@ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes)
213
213
  template<typename A>
214
214
  void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
215
215
  // header
216
- const uint8_t preInts(getPreInts());
217
- os.write((char*)&preInts, sizeof(preInts));
218
- const uint8_t serialVersion(HllUtil<A>::SER_VER);
219
- os.write((char*)&serialVersion, sizeof(serialVersion));
220
- const uint8_t familyId(HllUtil<A>::FAMILY_ID);
221
- os.write((char*)&familyId, sizeof(familyId));
222
- const uint8_t lgKByte((uint8_t) this->lgConfigK);
223
- os.write((char*)&lgKByte, sizeof(lgKByte));
224
- const uint8_t lgArrIntsByte(count_trailing_zeros_in_u32(coupons.size()));
225
- os.write((char*)&lgArrIntsByte, sizeof(lgArrIntsByte));
226
- const uint8_t flagsByte(this->makeFlagsByte(compact));
227
- os.write((char*)&flagsByte, sizeof(flagsByte));
228
-
229
- if (this->mode == LIST) {
230
- const uint8_t listCount((uint8_t) couponCount);
231
- os.write((char*)&listCount, sizeof(listCount));
216
+ const uint8_t preInts = getPreInts();
217
+ write(os, preInts);
218
+ const uint8_t serialVersion(hll_constants::SER_VER);
219
+ write(os, serialVersion);
220
+ const uint8_t familyId(hll_constants::FAMILY_ID);
221
+ write(os, familyId);
222
+ const uint8_t lgKByte = this->lgConfigK_;
223
+ write(os, lgKByte);
224
+ const uint8_t lgArrIntsByte = count_trailing_zeros_in_u32(static_cast<uint32_t>(coupons_.size()));
225
+ write(os, lgArrIntsByte);
226
+ const uint8_t flagsByte = this->makeFlagsByte(compact);
227
+ write(os, flagsByte);
228
+
229
+ if (this->mode_ == LIST) {
230
+ const uint8_t listCount = static_cast<uint8_t>(couponCount_);
231
+ write(os, listCount);
232
232
  } else { // mode == SET
233
- const uint8_t unused(0);
234
- os.write((char*)&unused, sizeof(unused));
233
+ const uint8_t unused = 0;
234
+ write(os, unused);
235
235
  }
236
236
 
237
- const uint8_t modeByte(this->makeModeByte());
238
- os.write((char*)&modeByte, sizeof(modeByte));
237
+ const uint8_t modeByte = this->makeModeByte();
238
+ write(os, modeByte);
239
239
 
240
- if (this->mode == SET) {
240
+ if (this->mode_ == SET) {
241
241
  // writing as int, already stored as int
242
- os.write((char*)&couponCount, sizeof(couponCount));
242
+ write(os, couponCount_);
243
243
  }
244
244
 
245
245
  // coupons
@@ -247,12 +247,12 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
247
247
  const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
248
248
  switch (sw) {
249
249
  case 0: { // src updatable, dst updatable
250
- os.write((char*)coupons.data(), coupons.size() * sizeof(int));
250
+ write(os, coupons_.data(), coupons_.size() * sizeof(uint32_t));
251
251
  break;
252
252
  }
253
253
  case 1: { // src updatable, dst compact
254
- for (uint32_t coupon: *this) {
255
- os.write((char*)&coupon, sizeof(coupon));
254
+ for (const uint32_t coupon: *this) {
255
+ write(os, coupon);
256
256
  }
257
257
  break;
258
258
  }
@@ -265,14 +265,14 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
265
265
  }
266
266
 
267
267
  template<typename A>
268
- HllSketchImpl<A>* CouponList<A>::couponUpdate(int coupon) {
269
- for (size_t i = 0; i < coupons.size(); ++i) { // search for empty slot
270
- const int couponAtIdx = coupons[i];
271
- if (couponAtIdx == HllUtil<A>::EMPTY) {
272
- coupons[i] = coupon; // the actual update
273
- ++couponCount;
274
- if (couponCount == static_cast<int>(coupons.size())) { // array full
275
- if (this->lgConfigK < 8) {
268
+ HllSketchImpl<A>* CouponList<A>::couponUpdate(uint32_t coupon) {
269
+ for (size_t i = 0; i < coupons_.size(); ++i) { // search for empty slot
270
+ const uint32_t couponAtIdx = coupons_[i];
271
+ if (couponAtIdx == hll_constants::EMPTY) {
272
+ coupons_[i] = coupon; // the actual update
273
+ ++couponCount_;
274
+ if (couponCount_ == static_cast<uint32_t>(coupons_.size())) { // array full
275
+ if (this->lgConfigK_ < 8) {
276
276
  return promoteHeapListOrSetToHll(*this);
277
277
  }
278
278
  return promoteHeapListToSet(*this);
@@ -293,71 +293,68 @@ double CouponList<A>::getCompositeEstimate() const { return getEstimate(); }
293
293
 
294
294
  template<typename A>
295
295
  double CouponList<A>::getEstimate() const {
296
- const int couponCount = getCouponCount();
297
- const double est = CubicInterpolation<A>::usingXAndYTables(couponCount);
298
- return fmax(est, couponCount);
296
+ const double est = CubicInterpolation<A>::usingXAndYTables(couponCount_);
297
+ return fmax(est, couponCount_);
299
298
  }
300
299
 
301
300
  template<typename A>
302
- double CouponList<A>::getLowerBound(const int numStdDev) const {
301
+ double CouponList<A>::getLowerBound(uint8_t numStdDev) const {
303
302
  HllUtil<A>::checkNumStdDev(numStdDev);
304
- const int couponCount = getCouponCount();
305
- const double est = CubicInterpolation<A>::usingXAndYTables(couponCount);
306
- const double tmp = est / (1.0 + (numStdDev * HllUtil<A>::COUPON_RSE));
307
- return fmax(tmp, couponCount);
303
+ const double est = CubicInterpolation<A>::usingXAndYTables(couponCount_);
304
+ const double tmp = est / (1.0 + (numStdDev * hll_constants::COUPON_RSE));
305
+ return fmax(tmp, couponCount_);
308
306
  }
309
307
 
310
308
  template<typename A>
311
- double CouponList<A>::getUpperBound(const int numStdDev) const {
309
+ double CouponList<A>::getUpperBound(uint8_t numStdDev) const {
312
310
  HllUtil<A>::checkNumStdDev(numStdDev);
313
- const int couponCount = getCouponCount();
314
- const double est = CubicInterpolation<A>::usingXAndYTables(couponCount);
315
- const double tmp = est / (1.0 - (numStdDev * HllUtil<A>::COUPON_RSE));
316
- return fmax(tmp, couponCount);
311
+ const double est = CubicInterpolation<A>::usingXAndYTables(couponCount_);
312
+ const double tmp = est / (1.0 - (numStdDev * hll_constants::COUPON_RSE));
313
+ return fmax(tmp, couponCount_);
317
314
  }
318
315
 
319
316
  template<typename A>
320
317
  bool CouponList<A>::isEmpty() const { return getCouponCount() == 0; }
321
318
 
322
319
  template<typename A>
323
- int CouponList<A>::getUpdatableSerializationBytes() const {
324
- return getMemDataStart() + coupons.size() * sizeof(int);
320
+ uint32_t CouponList<A>::getUpdatableSerializationBytes() const {
321
+ return getMemDataStart() + static_cast<uint32_t>(coupons_.size()) * sizeof(uint32_t);
325
322
  }
326
323
 
327
324
  template<typename A>
328
- int CouponList<A>::getCouponCount() const {
329
- return couponCount;
325
+ uint32_t CouponList<A>::getCouponCount() const {
326
+ return couponCount_;
330
327
  }
331
328
 
332
329
  template<typename A>
333
- int CouponList<A>::getCompactSerializationBytes() const {
334
- return getMemDataStart() + (couponCount << 2);
330
+ uint32_t CouponList<A>::getCompactSerializationBytes() const {
331
+ return getMemDataStart() + (couponCount_ << 2);
335
332
  }
336
333
 
337
334
  template<typename A>
338
- int CouponList<A>::getMemDataStart() const {
339
- return HllUtil<A>::LIST_INT_ARR_START;
335
+ uint32_t CouponList<A>::getMemDataStart() const {
336
+ return hll_constants::LIST_INT_ARR_START;
340
337
  }
341
338
 
342
339
  template<typename A>
343
- int CouponList<A>::getPreInts() const {
344
- return HllUtil<A>::LIST_PREINTS;
340
+ uint8_t CouponList<A>::getPreInts() const {
341
+ return hll_constants::LIST_PREINTS;
345
342
  }
346
343
 
347
344
  template<typename A>
348
345
  bool CouponList<A>::isCompact() const { return false; }
349
346
 
350
347
  template<typename A>
351
- bool CouponList<A>::isOutOfOrderFlag() const { return oooFlag; }
348
+ bool CouponList<A>::isOutOfOrderFlag() const { return oooFlag_; }
352
349
 
353
350
  template<typename A>
354
351
  void CouponList<A>::putOutOfOrderFlag(bool oooFlag) {
355
- this->oooFlag = oooFlag;
352
+ oooFlag_ = oooFlag;
356
353
  }
357
354
 
358
355
  template<typename A>
359
356
  A CouponList<A>::getAllocator() const {
360
- return coupons.get_allocator();
357
+ return coupons_.get_allocator();
361
358
  }
362
359
 
363
360
  template<typename A>
@@ -372,12 +369,12 @@ HllSketchImpl<A>* CouponList<A>::promoteHeapListOrSetToHll(CouponList& src) {
372
369
 
373
370
  template<typename A>
374
371
  coupon_iterator<A> CouponList<A>::begin(bool all) const {
375
- return coupon_iterator<A>(coupons.data(), coupons.size(), 0, all);
372
+ return coupon_iterator<A>(coupons_.data(), coupons_.size(), 0, all);
376
373
  }
377
374
 
378
375
  template<typename A>
379
376
  coupon_iterator<A> CouponList<A>::end() const {
380
- return coupon_iterator<A>(coupons.data(), coupons.size(), coupons.size(), false);
377
+ return coupon_iterator<A>(coupons_.data(), coupons_.size(), coupons_.size(), false);
381
378
  }
382
379
 
383
380
  }
@@ -33,7 +33,7 @@ class HllSketchImplFactory;
33
33
  template<typename A>
34
34
  class CouponList : public HllSketchImpl<A> {
35
35
  public:
36
- CouponList(int lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator);
36
+ CouponList(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator);
37
37
  CouponList(const CouponList& that, target_hll_type tgtHllType);
38
38
 
39
39
  static CouponList* newList(const void* bytes, size_t len, const A& allocator);
@@ -47,15 +47,15 @@ class CouponList : public HllSketchImpl<A> {
47
47
  virtual CouponList* copy() const;
48
48
  virtual CouponList* copyAs(target_hll_type tgtHllType) const;
49
49
 
50
- virtual HllSketchImpl<A>* couponUpdate(int coupon);
50
+ virtual HllSketchImpl<A>* couponUpdate(uint32_t coupon);
51
51
 
52
52
  virtual double getEstimate() const;
53
53
  virtual double getCompositeEstimate() const;
54
- virtual double getUpperBound(int numStdDev) const;
55
- virtual double getLowerBound(int numStdDev) const;
54
+ virtual double getUpperBound(uint8_t numStdDev) const;
55
+ virtual double getLowerBound(uint8_t numStdDev) const;
56
56
 
57
57
  virtual bool isEmpty() const;
58
- virtual int getCouponCount() const;
58
+ virtual uint32_t getCouponCount() const;
59
59
 
60
60
  coupon_iterator<A> begin(bool all = false) const;
61
61
  coupon_iterator<A> end() const;
@@ -63,24 +63,24 @@ class CouponList : public HllSketchImpl<A> {
63
63
  protected:
64
64
  using ClAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>>;
65
65
 
66
- using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
66
+ using vector_int = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
67
67
 
68
68
  HllSketchImpl<A>* promoteHeapListToSet(CouponList& list);
69
69
  HllSketchImpl<A>* promoteHeapListOrSetToHll(CouponList& src);
70
70
 
71
- virtual int getUpdatableSerializationBytes() const;
72
- virtual int getCompactSerializationBytes() const;
73
- virtual int getMemDataStart() const;
74
- virtual int getPreInts() const;
71
+ virtual uint32_t getUpdatableSerializationBytes() const;
72
+ virtual uint32_t getCompactSerializationBytes() const;
73
+ virtual uint32_t getMemDataStart() const;
74
+ virtual uint8_t getPreInts() const;
75
75
  virtual bool isCompact() const;
76
76
  virtual bool isOutOfOrderFlag() const;
77
77
  virtual void putOutOfOrderFlag(bool oooFlag);
78
78
 
79
79
  virtual A getAllocator() const;
80
80
 
81
- int couponCount;
82
- bool oooFlag;
83
- vector_int coupons;
81
+ uint32_t couponCount_;
82
+ bool oooFlag_;
83
+ vector_int coupons_;
84
84
 
85
85
  friend class HllSketchImplFactory<A>;
86
86
  };
@@ -102,10 +102,8 @@ double CubicInterpolation<A>::usingXAndYTables(const double xArr[], const double
102
102
  else if (offset == numEntries-2) { // corner case
103
103
  return (interpolateUsingXAndYTables<A>(xArr, yArr, (offset-2), x));
104
104
  }
105
- else { // main case
106
- return (interpolateUsingXAndYTables<A>(xArr, yArr, (offset-1), x));
107
- }
108
- throw std::logic_error("Exception should be unreachable");
105
+ // main case
106
+ return (interpolateUsingXAndYTables<A>(xArr, yArr, (offset-1), x));
109
107
  }
110
108
 
111
109
  // In C: again-two-registers cubic_interpolate_aux L1368
@@ -68,7 +68,7 @@ double HarmonicNumbers<A>::harmonicNumber(const uint64_t x_i) {
68
68
  if (x_i < NUM_EXACT_HARMONIC_NUMBERS) {
69
69
  return tableOfExactHarmonicNumbers[x_i];
70
70
  } else {
71
- double x = x_i;
71
+ double x = static_cast<double>(x_i);
72
72
  double invSq = 1.0 / (x * x);
73
73
  double sum = log(x) + EULER_MASCHERONI_CONSTANT + (1.0 / (2.0 * x));
74
74
  /* note: the number of terms included from this series expansion is appropriate