datasketches 0.1.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +17 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  6. data/ext/datasketches/ext.cpp +1 -1
  7. data/ext/datasketches/ext.h +4 -0
  8. data/ext/datasketches/extconf.rb +1 -1
  9. data/ext/datasketches/fi_wrapper.cpp +6 -8
  10. data/ext/datasketches/hll_wrapper.cpp +13 -14
  11. data/ext/datasketches/kll_wrapper.cpp +28 -76
  12. data/ext/datasketches/theta_wrapper.cpp +27 -41
  13. data/ext/datasketches/vo_wrapper.cpp +4 -6
  14. data/lib/datasketches/version.rb +1 -1
  15. data/vendor/datasketches-cpp/CMakeLists.txt +10 -0
  16. data/vendor/datasketches-cpp/LICENSE +40 -3
  17. data/vendor/datasketches-cpp/NOTICE +1 -1
  18. data/vendor/datasketches-cpp/README.md +4 -4
  19. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +18 -7
  20. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  21. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  24. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  25. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  26. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  27. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  28. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  29. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +13 -3
  31. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +20 -20
  32. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +116 -105
  33. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +22 -6
  34. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +140 -101
  35. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  36. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +20 -20
  37. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -16
  38. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +6 -6
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +10 -10
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +21 -21
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  42. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  43. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  46. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  47. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +102 -105
  48. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  49. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +141 -125
  50. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  51. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +5 -5
  52. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  53. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +81 -109
  54. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +25 -24
  55. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  56. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +5 -5
  57. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +89 -105
  58. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +13 -13
  59. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +130 -165
  60. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +21 -22
  61. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  62. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  63. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  64. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  65. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +88 -83
  66. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  67. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +34 -45
  68. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +7 -8
  69. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +41 -52
  70. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +7 -8
  71. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +220 -251
  72. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +42 -42
  73. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +36 -38
  74. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  75. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +15 -14
  76. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +47 -44
  77. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +62 -87
  78. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +121 -128
  79. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  80. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  81. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  82. data/vendor/datasketches-cpp/hll/include/hll.hpp +25 -53
  83. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +8 -8
  84. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +36 -36
  85. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +28 -28
  86. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  87. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +37 -37
  88. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +57 -61
  89. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  90. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  91. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  92. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  93. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  94. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  95. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +40 -25
  96. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +50 -6
  97. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +164 -136
  98. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  99. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  100. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  101. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  102. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +178 -88
  103. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  104. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  105. data/vendor/datasketches-cpp/python/CMakeLists.txt +12 -6
  106. data/vendor/datasketches-cpp/python/README.md +52 -49
  107. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  108. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  109. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  110. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -6
  111. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +4 -2
  112. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  113. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +38 -28
  114. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  115. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  116. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -2
  117. data/vendor/datasketches-cpp/python/tests/kll_test.py +5 -5
  118. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  119. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  120. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  121. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  122. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  123. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +18 -8
  124. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  125. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +488 -0
  126. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  127. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  128. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  129. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  130. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  131. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  132. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  133. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  134. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  135. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  136. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  137. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  138. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +19 -13
  139. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +130 -127
  140. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  141. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +41 -49
  142. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  143. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  144. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  145. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -44
  146. data/vendor/datasketches-cpp/setup.py +11 -6
  147. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  148. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +3 -2
  149. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  150. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  151. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
  152. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  153. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  154. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  155. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +11 -4
  156. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  157. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +26 -28
  158. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  159. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  160. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  161. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  162. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +24 -36
  163. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  164. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  165. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +163 -256
  166. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +250 -651
  167. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  168. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  169. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +6 -1
  170. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  171. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +10 -21
  172. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +44 -30
  173. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  174. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  175. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  176. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +60 -5
  177. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +74 -235
  178. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
  179. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  180. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
  181. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  182. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  183. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +57 -70
  184. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  185. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  186. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +18 -21
  187. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +13 -16
  188. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +7 -6
  189. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +3 -3
  190. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
  191. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +13 -16
  192. metadata +51 -36
  193. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  194. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  195. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  196. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  197. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  198. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  199. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  200. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  201. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  202. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  203. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  204. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  205. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -28,43 +28,42 @@ namespace datasketches {
28
28
  template<typename A>
29
29
  class AuxHashMap;
30
30
 
31
- template<typename A = std::allocator<char>>
31
+ template<typename A>
32
32
  class HllArray : public HllSketchImpl<A> {
33
33
  public:
34
- explicit HllArray(int lgConfigK, target_hll_type tgtHllType, bool startFullSize);
35
- explicit HllArray(const HllArray<A>& that);
34
+ HllArray(uint8_t lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator);
36
35
 
37
- static HllArray* newHll(const void* bytes, size_t len);
38
- static HllArray* newHll(std::istream& is);
36
+ static HllArray* newHll(const void* bytes, size_t len, const A& allocator);
37
+ static HllArray* newHll(std::istream& is, const A& allocator);
39
38
 
40
39
  virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
41
40
  virtual void serialize(std::ostream& os, bool compact) const;
42
41
 
43
- virtual ~HllArray();
42
+ virtual ~HllArray() = default;
44
43
  virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const = 0;
45
44
 
46
45
  virtual HllArray* copy() const = 0;
47
46
  virtual HllArray* copyAs(target_hll_type tgtHllType) const;
48
47
 
49
- virtual HllSketchImpl<A>* couponUpdate(int coupon) = 0;
48
+ virtual HllSketchImpl<A>* couponUpdate(uint32_t coupon) = 0;
50
49
 
51
50
  virtual double getEstimate() const;
52
51
  virtual double getCompositeEstimate() const;
53
- virtual double getLowerBound(int numStdDev) const;
54
- virtual double getUpperBound(int numStdDev) const;
52
+ virtual double getLowerBound(uint8_t numStdDev) const;
53
+ virtual double getUpperBound(uint8_t numStdDev) const;
55
54
 
56
55
  inline void addToHipAccum(double delta);
57
56
 
58
57
  inline void decNumAtCurMin();
59
58
 
60
- inline int getCurMin() const;
61
- inline int getNumAtCurMin() const;
59
+ inline uint8_t getCurMin() const;
60
+ inline uint32_t getNumAtCurMin() const;
62
61
  inline double getHipAccum() const;
63
62
 
64
- virtual int getHllByteArrBytes() const = 0;
63
+ virtual uint32_t getHllByteArrBytes() const = 0;
65
64
 
66
- virtual int getUpdatableSerializationBytes() const;
67
- virtual int getCompactSerializationBytes() const;
65
+ virtual uint32_t getUpdatableSerializationBytes() const;
66
+ virtual uint32_t getCompactSerializationBytes() const;
68
67
 
69
68
  virtual bool isOutOfOrderFlag() const;
70
69
  virtual bool isEmpty() const;
@@ -75,19 +74,19 @@ class HllArray : public HllSketchImpl<A> {
75
74
  inline double getKxQ0() const;
76
75
  inline double getKxQ1() const;
77
76
 
78
- virtual int getMemDataStart() const;
79
- virtual int getPreInts() const;
77
+ virtual uint32_t getMemDataStart() const;
78
+ virtual uint8_t getPreInts() const;
80
79
 
81
- void putCurMin(int curMin);
80
+ void putCurMin(uint8_t curMin);
82
81
  void putHipAccum(double hipAccum);
83
82
  inline void putKxQ0(double kxq0);
84
83
  inline void putKxQ1(double kxq1);
85
- void putNumAtCurMin(int numAtCurMin);
84
+ void putNumAtCurMin(uint32_t numAtCurMin);
86
85
 
87
- static int hllArrBytes(target_hll_type tgtHllType, int lgConfigK);
88
- static int hll4ArrBytes(int lgConfigK);
89
- static int hll6ArrBytes(int lgConfigK);
90
- static int hll8ArrBytes(int lgConfigK);
86
+ static uint32_t hllArrBytes(target_hll_type tgtHllType, uint8_t lgConfigK);
87
+ static uint32_t hll4ArrBytes(uint8_t lgConfigK);
88
+ static uint32_t hll6ArrBytes(uint8_t lgConfigK);
89
+ static uint32_t hll8ArrBytes(uint8_t lgConfigK);
91
90
 
92
91
  virtual AuxHashMap<A>* getAuxHashMap() const;
93
92
 
@@ -95,18 +94,20 @@ class HllArray : public HllSketchImpl<A> {
95
94
  virtual const_iterator begin(bool all = false) const;
96
95
  virtual const_iterator end() const;
97
96
 
97
+ virtual A getAllocator() const;
98
+
98
99
  protected:
99
100
  void hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue);
100
- double getHllBitMapEstimate(int lgConfigK, int curMin, int numAtCurMin) const;
101
- double getHllRawEstimate(int lgConfigK, double kxqSum) const;
101
+ double getHllBitMapEstimate() const;
102
+ double getHllRawEstimate() const;
102
103
 
103
- double hipAccum;
104
- double kxq0;
105
- double kxq1;
106
- uint8_t* hllByteArr; //init by sub-classes
107
- int curMin; //always zero for Hll6 and Hll8, only tracked by Hll4Array
108
- int numAtCurMin; //interpreted as num zeros when curMin == 0
109
- bool oooFlag; //Out-Of-Order Flag
104
+ double hipAccum_;
105
+ double kxq0_;
106
+ double kxq1_;
107
+ vector_u8<A> hllByteArr_; //init by sub-classes
108
+ uint8_t curMin_; //always zero for Hll6 and Hll8, only tracked by Hll4Array
109
+ uint32_t numAtCurMin_; //interpreted as num zeros when curMin == 0
110
+ bool oooFlag_; //Out-Of-Order Flag
110
111
 
111
112
  friend class HllSketchImplFactory<A>;
112
113
  };
@@ -114,21 +115,20 @@ class HllArray : public HllSketchImpl<A> {
114
115
  template<typename A>
115
116
  class HllArray<A>::const_iterator: public std::iterator<std::input_iterator_tag, uint32_t> {
116
117
  public:
117
- const_iterator(const uint8_t* array, size_t array_slze, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all);
118
- //const_iterator(const uint8_t* array, size_t array_slze, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset);
118
+ const_iterator(const uint8_t* array, uint32_t array_slze, uint32_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all);
119
119
  const_iterator& operator++();
120
120
  bool operator!=(const const_iterator& other) const;
121
121
  uint32_t operator*() const;
122
122
  private:
123
- const uint8_t* array;
124
- size_t array_size;
125
- size_t index;
126
- target_hll_type hll_type;
127
- const AuxHashMap<A>* exceptions;
128
- uint8_t offset;
129
- bool all;
130
- uint8_t value; // cached value to avoid computing in operator++ and in operator*()
131
- static inline uint8_t get_value(const uint8_t* array, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset);
123
+ const uint8_t* array_;
124
+ uint32_t array_size_;
125
+ uint32_t index_;
126
+ target_hll_type hll_type_;
127
+ const AuxHashMap<A>* exceptions_;
128
+ uint8_t offset_;
129
+ bool all_;
130
+ uint8_t value_; // cached value to avoid computing in operator++ and in operator*()
131
+ static inline uint8_t get_value(const uint8_t* array, uint32_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset);
132
132
  };
133
133
 
134
134
  }
@@ -42,28 +42,26 @@ typedef union {
42
42
  } longDoubleUnion;
43
43
 
44
44
  template<typename A>
45
- hll_sketch_alloc<A>::hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type, bool start_full_size) {
45
+ hll_sketch_alloc<A>::hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type, bool start_full_size, const A& allocator) {
46
46
  HllUtil<A>::checkLgK(lg_config_k);
47
47
  if (start_full_size) {
48
- sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size);
48
+ sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size, allocator);
49
49
  } else {
50
50
  typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
51
- sketch_impl = new (clAlloc().allocate(1)) CouponList<A>(lg_config_k, tgt_type, hll_mode::LIST);
51
+ sketch_impl = new (clAlloc(allocator).allocate(1)) CouponList<A>(lg_config_k, tgt_type, hll_mode::LIST, allocator);
52
52
  }
53
53
  }
54
54
 
55
55
  template<typename A>
56
- hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(std::istream& is) {
57
- HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(is);
58
- hll_sketch_alloc<A> sketch(impl);
59
- return sketch;
56
+ hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(std::istream& is, const A& allocator) {
57
+ HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(is, allocator);
58
+ return hll_sketch_alloc<A>(impl);
60
59
  }
61
60
 
62
61
  template<typename A>
63
- hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(const void* bytes, size_t len) {
64
- HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(bytes, len);
65
- hll_sketch_alloc<A> sketch(impl);
66
- return sketch;
62
+ hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(const void* bytes, size_t len, const A& allocator) {
63
+ HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(bytes, len, allocator);
64
+ return hll_sketch_alloc<A>(impl);
67
65
  }
68
66
 
69
67
  template<typename A>
@@ -124,7 +122,7 @@ void hll_sketch_alloc<A>::update(const std::string& datum) {
124
122
  }
125
123
 
126
124
  template<typename A>
127
- void hll_sketch_alloc<A>::update(const uint64_t datum) {
125
+ void hll_sketch_alloc<A>::update(uint64_t datum) {
128
126
  // no sign extension with 64 bits so no need to cast to signed value
129
127
  HashState hashResult;
130
128
  HllUtil<A>::hash(&datum, sizeof(uint64_t), DEFAULT_SEED, hashResult);
@@ -132,53 +130,53 @@ void hll_sketch_alloc<A>::update(const uint64_t datum) {
132
130
  }
133
131
 
134
132
  template<typename A>
135
- void hll_sketch_alloc<A>::update(const uint32_t datum) {
133
+ void hll_sketch_alloc<A>::update(uint32_t datum) {
136
134
  update(static_cast<int32_t>(datum));
137
135
  }
138
136
 
139
137
  template<typename A>
140
- void hll_sketch_alloc<A>::update(const uint16_t datum) {
138
+ void hll_sketch_alloc<A>::update(uint16_t datum) {
141
139
  update(static_cast<int16_t>(datum));
142
140
  }
143
141
 
144
142
  template<typename A>
145
- void hll_sketch_alloc<A>::update(const uint8_t datum) {
143
+ void hll_sketch_alloc<A>::update(uint8_t datum) {
146
144
  update(static_cast<int8_t>(datum));
147
145
  }
148
146
 
149
147
  template<typename A>
150
- void hll_sketch_alloc<A>::update(const int64_t datum) {
148
+ void hll_sketch_alloc<A>::update(int64_t datum) {
151
149
  HashState hashResult;
152
150
  HllUtil<A>::hash(&datum, sizeof(int64_t), DEFAULT_SEED, hashResult);
153
151
  coupon_update(HllUtil<A>::coupon(hashResult));
154
152
  }
155
153
 
156
154
  template<typename A>
157
- void hll_sketch_alloc<A>::update(const int32_t datum) {
158
- int64_t val = static_cast<int64_t>(datum);
155
+ void hll_sketch_alloc<A>::update(int32_t datum) {
156
+ const int64_t val = static_cast<int64_t>(datum);
159
157
  HashState hashResult;
160
158
  HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
161
159
  coupon_update(HllUtil<A>::coupon(hashResult));
162
160
  }
163
161
 
164
162
  template<typename A>
165
- void hll_sketch_alloc<A>::update(const int16_t datum) {
166
- int64_t val = static_cast<int64_t>(datum);
163
+ void hll_sketch_alloc<A>::update(int16_t datum) {
164
+ const int64_t val = static_cast<int64_t>(datum);
167
165
  HashState hashResult;
168
166
  HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
169
167
  coupon_update(HllUtil<A>::coupon(hashResult));
170
168
  }
171
169
 
172
170
  template<typename A>
173
- void hll_sketch_alloc<A>::update(const int8_t datum) {
174
- int64_t val = static_cast<int64_t>(datum);
171
+ void hll_sketch_alloc<A>::update(int8_t datum) {
172
+ const int64_t val = static_cast<int64_t>(datum);
175
173
  HashState hashResult;
176
174
  HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
177
175
  coupon_update(HllUtil<A>::coupon(hashResult));
178
176
  }
179
177
 
180
178
  template<typename A>
181
- void hll_sketch_alloc<A>::update(const double datum) {
179
+ void hll_sketch_alloc<A>::update(double datum) {
182
180
  longDoubleUnion d;
183
181
  d.doubleBytes = static_cast<double>(datum);
184
182
  if (datum == 0.0) {
@@ -192,7 +190,7 @@ void hll_sketch_alloc<A>::update(const double datum) {
192
190
  }
193
191
 
194
192
  template<typename A>
195
- void hll_sketch_alloc<A>::update(const float datum) {
193
+ void hll_sketch_alloc<A>::update(float datum) {
196
194
  longDoubleUnion d;
197
195
  d.doubleBytes = static_cast<double>(datum);
198
196
  if (datum == 0.0) {
@@ -206,7 +204,7 @@ void hll_sketch_alloc<A>::update(const float datum) {
206
204
  }
207
205
 
208
206
  template<typename A>
209
- void hll_sketch_alloc<A>::update(const void* data, const size_t lengthBytes) {
207
+ void hll_sketch_alloc<A>::update(const void* data, size_t lengthBytes) {
210
208
  if (data == nullptr) { return; }
211
209
  HashState hashResult;
212
210
  HllUtil<A>::hash(data, lengthBytes, DEFAULT_SEED, hashResult);
@@ -214,8 +212,8 @@ void hll_sketch_alloc<A>::update(const void* data, const size_t lengthBytes) {
214
212
  }
215
213
 
216
214
  template<typename A>
217
- void hll_sketch_alloc<A>::coupon_update(int coupon) {
218
- if (coupon == HllUtil<A>::EMPTY) { return; }
215
+ void hll_sketch_alloc<A>::coupon_update(uint32_t coupon) {
216
+ if (coupon == hll_constants::EMPTY) { return; }
219
217
  HllSketchImpl<A>* result = this->sketch_impl->couponUpdate(coupon);
220
218
  if (result != this->sketch_impl) {
221
219
  this->sketch_impl->get_deleter()(this->sketch_impl);
@@ -354,12 +352,12 @@ double hll_sketch_alloc<A>::get_composite_estimate() const {
354
352
  }
355
353
 
356
354
  template<typename A>
357
- double hll_sketch_alloc<A>::get_lower_bound(int numStdDev) const {
355
+ double hll_sketch_alloc<A>::get_lower_bound(uint8_t numStdDev) const {
358
356
  return sketch_impl->getLowerBound(numStdDev);
359
357
  }
360
358
 
361
359
  template<typename A>
362
- double hll_sketch_alloc<A>::get_upper_bound(int numStdDev) const {
360
+ double hll_sketch_alloc<A>::get_upper_bound(uint8_t numStdDev) const {
363
361
  return sketch_impl->getUpperBound(numStdDev);
364
362
  }
365
363
 
@@ -369,7 +367,7 @@ hll_mode hll_sketch_alloc<A>::get_current_mode() const {
369
367
  }
370
368
 
371
369
  template<typename A>
372
- int hll_sketch_alloc<A>::get_lg_config_k() const {
370
+ uint8_t hll_sketch_alloc<A>::get_lg_config_k() const {
373
371
  return sketch_impl->getLgConfigK();
374
372
  }
375
373
 
@@ -389,12 +387,12 @@ bool hll_sketch_alloc<A>::is_estimation_mode() const {
389
387
  }
390
388
 
391
389
  template<typename A>
392
- int hll_sketch_alloc<A>::get_updatable_serialization_bytes() const {
390
+ uint32_t hll_sketch_alloc<A>::get_updatable_serialization_bytes() const {
393
391
  return sketch_impl->getUpdatableSerializationBytes();
394
392
  }
395
393
 
396
394
  template<typename A>
397
- int hll_sketch_alloc<A>::get_compact_serialization_bytes() const {
395
+ uint32_t hll_sketch_alloc<A>::get_compact_serialization_bytes() const {
398
396
  return sketch_impl->getCompactSerializationBytes();
399
397
  }
400
398
 
@@ -437,23 +435,23 @@ std::string hll_sketch_alloc<A>::mode_as_string() const {
437
435
  }
438
436
 
439
437
  template<typename A>
440
- int hll_sketch_alloc<A>::get_max_updatable_serialization_bytes(const int lg_config_k,
438
+ uint32_t hll_sketch_alloc<A>::get_max_updatable_serialization_bytes(uint8_t lg_config_k,
441
439
  const target_hll_type tgtHllType) {
442
- int arrBytes;
440
+ uint32_t arrBytes;
443
441
  if (tgtHllType == target_hll_type::HLL_4) {
444
- const int auxBytes = 4 << HllUtil<A>::LG_AUX_ARR_INTS[lg_config_k];
442
+ const uint32_t auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[lg_config_k];
445
443
  arrBytes = HllArray<A>::hll4ArrBytes(lg_config_k) + auxBytes;
446
444
  } else if (tgtHllType == target_hll_type::HLL_6) {
447
445
  arrBytes = HllArray<A>::hll6ArrBytes(lg_config_k);
448
446
  } else { //HLL_8
449
447
  arrBytes = HllArray<A>::hll8ArrBytes(lg_config_k);
450
448
  }
451
- return HllUtil<A>::HLL_BYTE_ARR_START + arrBytes;
449
+ return hll_constants::HLL_BYTE_ARR_START + arrBytes;
452
450
  }
453
451
 
454
452
  template<typename A>
455
- double hll_sketch_alloc<A>::get_rel_err(const bool upperBound, const bool unioned,
456
- const int lg_config_k, const int numStdDev) {
453
+ double hll_sketch_alloc<A>::get_rel_err(bool upperBound, bool unioned,
454
+ uint8_t lg_config_k, uint8_t numStdDev) {
457
455
  return HllUtil<A>::getRelErr(upperBound, unioned, lg_config_k, numStdDev);
458
456
  }
459
457
 
@@ -26,12 +26,12 @@
26
26
  namespace datasketches {
27
27
 
28
28
  template<typename A>
29
- HllSketchImpl<A>::HllSketchImpl(const int lgConfigK, const target_hll_type tgtHllType,
30
- const hll_mode mode, const bool startFullSize)
31
- : lgConfigK(lgConfigK),
32
- tgtHllType(tgtHllType),
33
- mode(mode),
34
- startFullSize(startFullSize)
29
+ HllSketchImpl<A>::HllSketchImpl(uint8_t lgConfigK, target_hll_type tgtHllType,
30
+ hll_mode mode, bool startFullSize)
31
+ : lgConfigK_(lgConfigK),
32
+ tgtHllType_(tgtHllType),
33
+ mode_(mode),
34
+ startFullSize_(startFullSize)
35
35
  {
36
36
  }
37
37
 
@@ -40,7 +40,7 @@ HllSketchImpl<A>::~HllSketchImpl() {
40
40
  }
41
41
 
42
42
  template<typename A>
43
- target_hll_type HllSketchImpl<A>::extractTgtHllType(const uint8_t modeByte) {
43
+ target_hll_type HllSketchImpl<A>::extractTgtHllType(uint8_t modeByte) {
44
44
  switch ((modeByte >> 2) & 0x3) {
45
45
  case 0:
46
46
  return target_hll_type::HLL_4;
@@ -54,7 +54,7 @@ target_hll_type HllSketchImpl<A>::extractTgtHllType(const uint8_t modeByte) {
54
54
  }
55
55
 
56
56
  template<typename A>
57
- hll_mode HllSketchImpl<A>::extractCurMode(const uint8_t modeByte) {
57
+ hll_mode HllSketchImpl<A>::extractCurMode(uint8_t modeByte) {
58
58
  switch (modeByte & 0x3) {
59
59
  case 0:
60
60
  return hll_mode::LIST;
@@ -68,12 +68,12 @@ hll_mode HllSketchImpl<A>::extractCurMode(const uint8_t modeByte) {
68
68
  }
69
69
 
70
70
  template<typename A>
71
- uint8_t HllSketchImpl<A>::makeFlagsByte(const bool compact) const {
72
- uint8_t flags(0);
73
- flags |= (isEmpty() ? HllUtil<A>::EMPTY_FLAG_MASK : 0);
74
- flags |= (compact ? HllUtil<A>::COMPACT_FLAG_MASK : 0);
75
- flags |= (isOutOfOrderFlag() ? HllUtil<A>::OUT_OF_ORDER_FLAG_MASK : 0);
76
- flags |= (startFullSize ? HllUtil<A>::FULL_SIZE_FLAG_MASK : 0);
71
+ uint8_t HllSketchImpl<A>::makeFlagsByte(bool compact) const {
72
+ uint8_t flags = 0;
73
+ flags |= (isEmpty() ? hll_constants::EMPTY_FLAG_MASK : 0);
74
+ flags |= (compact ? hll_constants::COMPACT_FLAG_MASK : 0);
75
+ flags |= (isOutOfOrderFlag() ? hll_constants::OUT_OF_ORDER_FLAG_MASK : 0);
76
+ flags |= (startFullSize_ ? hll_constants::FULL_SIZE_FLAG_MASK : 0);
77
77
  return flags;
78
78
  }
79
79
 
@@ -92,7 +92,7 @@ template<typename A>
92
92
  uint8_t HllSketchImpl<A>::makeModeByte() const {
93
93
  uint8_t byte = 0;
94
94
 
95
- switch (mode) {
95
+ switch (mode_) {
96
96
  case LIST:
97
97
  byte = 0;
98
98
  break;
@@ -104,7 +104,7 @@ uint8_t HllSketchImpl<A>::makeModeByte() const {
104
104
  break;
105
105
  }
106
106
 
107
- switch (tgtHllType) {
107
+ switch (tgtHllType_) {
108
108
  case HLL_4:
109
109
  byte |= (0 << 2); // for completeness
110
110
  break;
@@ -121,27 +121,27 @@ uint8_t HllSketchImpl<A>::makeModeByte() const {
121
121
 
122
122
  template<typename A>
123
123
  HllSketchImpl<A>* HllSketchImpl<A>::reset() {
124
- return HllSketchImplFactory<A>::reset(this, startFullSize);
124
+ return HllSketchImplFactory<A>::reset(this, startFullSize_);
125
125
  }
126
126
 
127
127
  template<typename A>
128
128
  target_hll_type HllSketchImpl<A>::getTgtHllType() const {
129
- return tgtHllType;
129
+ return tgtHllType_;
130
130
  }
131
131
 
132
132
  template<typename A>
133
- int HllSketchImpl<A>::getLgConfigK() const {
134
- return lgConfigK;
133
+ uint8_t HllSketchImpl<A>::getLgConfigK() const {
134
+ return lgConfigK_;
135
135
  }
136
136
 
137
137
  template<typename A>
138
138
  hll_mode HllSketchImpl<A>::getCurMode() const {
139
- return mode;
139
+ return mode_;
140
140
  }
141
141
 
142
142
  template<typename A>
143
143
  bool HllSketchImpl<A>::isStartFullSize() const {
144
- return startFullSize;
144
+ return startFullSize_;
145
145
  }
146
146
 
147
147
  }
@@ -27,10 +27,10 @@
27
27
 
28
28
  namespace datasketches {
29
29
 
30
- template<typename A = std::allocator<char>>
30
+ template<typename A>
31
31
  class HllSketchImpl {
32
32
  public:
33
- HllSketchImpl(int lgConfigK, target_hll_type tgtHllType, hll_mode mode, bool startFullSize);
33
+ HllSketchImpl(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, bool startFullSize);
34
34
  virtual ~HllSketchImpl();
35
35
 
36
36
  virtual void serialize(std::ostream& os, bool compact) const = 0;
@@ -42,30 +42,31 @@ class HllSketchImpl {
42
42
 
43
43
  virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const = 0;
44
44
 
45
- virtual HllSketchImpl* couponUpdate(int coupon) = 0;
45
+ virtual HllSketchImpl* couponUpdate(uint32_t coupon) = 0;
46
46
 
47
47
  hll_mode getCurMode() const;
48
48
 
49
49
  virtual double getEstimate() const = 0;
50
50
  virtual double getCompositeEstimate() const = 0;
51
- virtual double getUpperBound(int numStdDev) const = 0;
52
- virtual double getLowerBound(int numStdDev) const = 0;
51
+ virtual double getUpperBound(uint8_t numStdDev) const = 0;
52
+ virtual double getLowerBound(uint8_t numStdDev) const = 0;
53
53
 
54
- inline int getLgConfigK() const;
54
+ inline uint8_t getLgConfigK() const;
55
55
 
56
- virtual int getMemDataStart() const = 0;
56
+ virtual uint32_t getMemDataStart() const = 0;
57
57
 
58
- virtual int getPreInts() const = 0;
58
+ virtual uint8_t getPreInts() const = 0;
59
59
 
60
60
  target_hll_type getTgtHllType() const;
61
61
 
62
- virtual int getUpdatableSerializationBytes() const = 0;
63
- virtual int getCompactSerializationBytes() const = 0;
62
+ virtual uint32_t getUpdatableSerializationBytes() const = 0;
63
+ virtual uint32_t getCompactSerializationBytes() const = 0;
64
64
 
65
65
  virtual bool isCompact() const = 0;
66
66
  virtual bool isEmpty() const = 0;
67
67
  virtual bool isOutOfOrderFlag() const = 0;
68
68
  virtual void putOutOfOrderFlag(bool oooFlag) = 0;
69
+ virtual A getAllocator() const = 0;
69
70
  bool isStartFullSize() const;
70
71
 
71
72
  protected:
@@ -74,10 +75,10 @@ class HllSketchImpl {
74
75
  uint8_t makeFlagsByte(bool compact) const;
75
76
  uint8_t makeModeByte() const;
76
77
 
77
- const int lgConfigK;
78
- const target_hll_type tgtHllType;
79
- const hll_mode mode;
80
- const bool startFullSize;
78
+ const uint8_t lgConfigK_;
79
+ const target_hll_type tgtHllType_;
80
+ const hll_mode mode_;
81
+ const bool startFullSize_;
81
82
  };
82
83
 
83
84
  }