datasketches 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  4. data/ext/datasketches/ext.cpp +1 -1
  5. data/ext/datasketches/ext.h +4 -0
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/fi_wrapper.cpp +6 -8
  8. data/ext/datasketches/hll_wrapper.cpp +13 -14
  9. data/ext/datasketches/kll_wrapper.cpp +28 -76
  10. data/ext/datasketches/theta_wrapper.cpp +27 -41
  11. data/ext/datasketches/vo_wrapper.cpp +4 -6
  12. data/lib/datasketches/version.rb +1 -1
  13. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  14. data/vendor/datasketches-cpp/README.md +4 -4
  15. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
  16. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  17. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  18. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  19. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  20. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
  22. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
  24. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
  25. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  26. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
  27. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
  28. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
  29. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
  30. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
  31. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  33. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  34. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
  35. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  36. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  38. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
  41. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
  44. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
  45. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
  46. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
  47. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  48. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  49. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
  50. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
  51. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
  52. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
  53. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
  54. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
  55. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
  56. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
  57. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
  58. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
  59. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
  60. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
  62. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  63. data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
  64. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
  65. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
  66. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
  67. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
  68. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
  70. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
  71. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
  72. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  74. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
  75. data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
  76. data/vendor/datasketches-cpp/python/README.md +6 -3
  77. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  78. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
  79. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
  80. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  81. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
  82. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
  83. data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
  84. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  85. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  86. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  87. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
  88. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  89. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
  90. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  91. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  92. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  93. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  94. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  95. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  96. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  97. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  98. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  99. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  100. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  101. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
  104. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  105. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
  106. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  107. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  108. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
  109. data/vendor/datasketches-cpp/setup.py +5 -3
  110. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  111. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
  112. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  114. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  115. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  116. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
  117. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
  119. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  120. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  121. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  122. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  123. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
  124. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  125. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  126. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
  127. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
  128. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  129. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  130. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
  131. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  132. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
  133. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
  134. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  135. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
  136. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
  137. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  138. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
  141. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  142. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
  143. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
  144. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
  145. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
  146. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
  147. metadata +43 -34
  148. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  149. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  150. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  151. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  152. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  153. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  154. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  155. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  156. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  157. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  160. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -24,20 +24,20 @@
24
24
 
25
25
  namespace datasketches {
26
26
 
27
- template<typename A = std::allocator<char>>
27
+ template<typename A>
28
28
  class CouponHashSet : public CouponList<A> {
29
29
  public:
30
- static CouponHashSet* newSet(const void* bytes, size_t len);
31
- static CouponHashSet* newSet(std::istream& is);
32
- explicit CouponHashSet(int lgConfigK, target_hll_type tgtHllType);
33
- explicit CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType);
34
- explicit CouponHashSet(const CouponHashSet& that);
30
+ static CouponHashSet* newSet(const void* bytes, size_t len, const A& allocator);
31
+ static CouponHashSet* newSet(std::istream& is, const A& allocator);
32
+ CouponHashSet(int lgConfigK, target_hll_type tgtHllType, const A& allocator);
33
+ CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType);
35
34
 
36
- virtual ~CouponHashSet();
35
+ virtual ~CouponHashSet() = default;
37
36
  virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
38
37
 
39
38
  protected:
40
-
39
+ using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
40
+
41
41
  virtual CouponHashSet* copy() const;
42
42
  virtual CouponHashSet* copyAs(target_hll_type tgtHllType) const;
43
43
 
@@ -49,9 +49,9 @@ class CouponHashSet : public CouponList<A> {
49
49
  friend class HllSketchImplFactory<A>;
50
50
 
51
51
  private:
52
- typedef typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>> chsAlloc;
52
+ using ChsAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>>;
53
53
  bool checkGrowOrPromote();
54
- void growHashSet(int srcLgCoupArrSize, int tgtLgCoupArrSize);
54
+ void growHashSet(int tgtLgCoupArrSize);
55
55
  };
56
56
 
57
57
  }
@@ -23,6 +23,7 @@
23
23
  #include "CouponList.hpp"
24
24
  #include "CubicInterpolation.hpp"
25
25
  #include "HllUtil.hpp"
26
+ #include "count_zeros.hpp"
26
27
 
27
28
  #include <algorithm>
28
29
  #include <cmath>
@@ -30,74 +31,45 @@
30
31
  namespace datasketches {
31
32
 
32
33
  template<typename A>
33
- CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode)
34
- : HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false) {
35
- if (mode == hll_mode::LIST) {
36
- lgCouponArrInts = HllUtil<A>::LG_INIT_LIST_SIZE;
37
- } else { // mode == SET
38
- lgCouponArrInts = HllUtil<A>::LG_INIT_SET_SIZE;
39
- }
40
- oooFlag = false;
41
- const int arrayLen = 1 << lgCouponArrInts;
42
- typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
43
- couponIntArr = intAlloc().allocate(arrayLen);
44
- std::fill(couponIntArr, couponIntArr + arrayLen, 0);
45
- couponCount = 0;
46
- }
47
-
48
- template<typename A>
49
- CouponList<A>::CouponList(const CouponList& that)
50
- : HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, that.mode, false),
51
- lgCouponArrInts(that.lgCouponArrInts),
52
- couponCount(that.couponCount),
53
- oooFlag(that.oooFlag) {
54
-
55
- const int numItems = 1 << lgCouponArrInts;
56
- typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
57
- couponIntArr = intAlloc().allocate(numItems);
58
- std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
59
- }
60
-
61
- template<typename A>
62
- CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType)
63
- : HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
64
- lgCouponArrInts(that.lgCouponArrInts),
65
- couponCount(that.couponCount),
66
- oooFlag(that.oooFlag) {
67
-
68
- const int numItems = 1 << lgCouponArrInts;
69
- typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
70
- couponIntArr = intAlloc().allocate(numItems);
71
- std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
72
- }
34
+ CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode, const A& allocator):
35
+ HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false),
36
+ couponCount(0),
37
+ oooFlag(false),
38
+ coupons(1 << (mode == hll_mode::LIST ? HllUtil<A>::LG_INIT_LIST_SIZE : HllUtil<A>::LG_INIT_SET_SIZE), 0, allocator)
39
+ {}
73
40
 
74
41
  template<typename A>
75
- CouponList<A>::~CouponList() {
76
- typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
77
- intAlloc().deallocate(couponIntArr, 1 << lgCouponArrInts);
78
- }
42
+ CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType):
43
+ HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
44
+ couponCount(that.couponCount),
45
+ oooFlag(that.oooFlag),
46
+ coupons(that.coupons)
47
+ {}
79
48
 
80
49
  template<typename A>
81
50
  std::function<void(HllSketchImpl<A>*)> CouponList<A>::get_deleter() const {
82
51
  return [](HllSketchImpl<A>* ptr) {
83
52
  CouponList<A>* cl = static_cast<CouponList<A>*>(ptr);
53
+ ClAlloc cla(cl->getAllocator());
84
54
  cl->~CouponList();
85
- clAlloc().deallocate(cl, 1);
55
+ cla.deallocate(cl, 1);
86
56
  };
87
57
  }
88
58
 
89
59
  template<typename A>
90
60
  CouponList<A>* CouponList<A>::copy() const {
91
- return new (clAlloc().allocate(1)) CouponList<A>(*this);
61
+ ClAlloc cla(coupons.get_allocator());
62
+ return new (cla.allocate(1)) CouponList<A>(*this);
92
63
  }
93
64
 
94
65
  template<typename A>
95
66
  CouponList<A>* CouponList<A>::copyAs(target_hll_type tgtHllType) const {
96
- return new (clAlloc().allocate(1)) CouponList<A>(*this, tgtHllType);
67
+ ClAlloc cla(coupons.get_allocator());
68
+ return new (cla.allocate(1)) CouponList<A>(*this, tgtHllType);
97
69
  }
98
70
 
99
71
  template<typename A>
100
- CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
72
+ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len, const A& allocator) {
101
73
  if (len < HllUtil<A>::LIST_INT_ARR_START) {
102
74
  throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
103
75
  }
@@ -115,7 +87,7 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
115
87
 
116
88
  hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
117
89
  if (mode != LIST) {
118
- throw std::invalid_argument("Calling set construtor with non-list mode data");
90
+ throw std::invalid_argument("Calling list constructor with non-list mode data");
119
91
  }
120
92
 
121
93
  target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
@@ -133,20 +105,21 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
133
105
  + ", found: " + std::to_string(len));
134
106
  }
135
107
 
136
- CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode);
108
+ ClAlloc cla(allocator);
109
+ CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
137
110
  sketch->couponCount = couponCount;
138
111
  sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST
139
112
 
140
113
  if (!emptyFlag) {
141
114
  // only need to read valid coupons, unlike in stream case
142
- std::memcpy(sketch->couponIntArr, data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int));
115
+ std::memcpy(sketch->coupons.data(), data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int));
143
116
  }
144
117
 
145
118
  return sketch;
146
119
  }
147
120
 
148
121
  template<typename A>
149
- CouponList<A>* CouponList<A>::newList(std::istream& is) {
122
+ CouponList<A>* CouponList<A>::newList(std::istream& is, const A& allocator) {
150
123
  uint8_t listHeader[8];
151
124
  is.read((char*)listHeader, 8 * sizeof(uint8_t));
152
125
 
@@ -162,7 +135,7 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
162
135
 
163
136
  hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
164
137
  if (mode != LIST) {
165
- throw std::invalid_argument("Calling list construtor with non-list mode data");
138
+ throw std::invalid_argument("Calling list constructor with non-list mode data");
166
139
  }
167
140
 
168
141
  const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
@@ -172,8 +145,9 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
172
145
  const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
173
146
  const bool emptyFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false);
174
147
 
175
- CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode);
176
- typedef std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>> coupon_list_ptr;
148
+ ClAlloc cla(allocator);
149
+ CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
150
+ using coupon_list_ptr = std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>>;
177
151
  coupon_list_ptr ptr(sketch, sketch->get_deleter());
178
152
  const int couponCount = listHeader[HllUtil<A>::LIST_COUNT_BYTE];
179
153
  sketch->couponCount = couponCount;
@@ -183,8 +157,8 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
183
157
  // For stream processing, need to read entire number written to stream so read
184
158
  // pointer ends up set correctly.
185
159
  // If not compact, still need to read empty items even though in order.
186
- const int numToRead = (compact ? couponCount : (1 << sketch->lgCouponArrInts));
187
- is.read((char*)sketch->couponIntArr, numToRead * sizeof(int));
160
+ const int numToRead = (compact ? couponCount : sketch->coupons.size());
161
+ is.read((char*)sketch->coupons.data(), numToRead * sizeof(int));
188
162
  }
189
163
 
190
164
  if (!is.good())
@@ -196,14 +170,14 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
196
170
  template<typename A>
197
171
  vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const {
198
172
  const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
199
- vector_u8<A> byteArr(sketchSizeBytes);
173
+ vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
200
174
  uint8_t* bytes = byteArr.data() + header_size_bytes;
201
175
 
202
176
  bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
203
177
  bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER);
204
178
  bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID);
205
179
  bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK);
206
- bytes[HllUtil<A>::LG_ARR_BYTE] = static_cast<uint8_t>(lgCouponArrInts);
180
+ bytes[HllUtil<A>::LG_ARR_BYTE] = count_trailing_zeros_in_u32(coupons.size());
207
181
  bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact);
208
182
  bytes[HllUtil<A>::LIST_COUNT_BYTE] = static_cast<uint8_t>(this->mode == LIST ? couponCount : 0);
209
183
  bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte();
@@ -217,7 +191,7 @@ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes)
217
191
  const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
218
192
  switch (sw) {
219
193
  case 0: { // src updatable, dst updatable
220
- std::memcpy(bytes + getMemDataStart(), getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int));
194
+ std::memcpy(bytes + getMemDataStart(), coupons.data(), coupons.size() * sizeof(int));
221
195
  break;
222
196
  }
223
197
  case 1: { // src updatable, dst compact
@@ -247,7 +221,7 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
247
221
  os.write((char*)&familyId, sizeof(familyId));
248
222
  const uint8_t lgKByte((uint8_t) this->lgConfigK);
249
223
  os.write((char*)&lgKByte, sizeof(lgKByte));
250
- const uint8_t lgArrIntsByte((uint8_t) lgCouponArrInts);
224
+ const uint8_t lgArrIntsByte(count_trailing_zeros_in_u32(coupons.size()));
251
225
  os.write((char*)&lgArrIntsByte, sizeof(lgArrIntsByte));
252
226
  const uint8_t flagsByte(this->makeFlagsByte(compact));
253
227
  os.write((char*)&flagsByte, sizeof(flagsByte));
@@ -273,7 +247,7 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
273
247
  const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
274
248
  switch (sw) {
275
249
  case 0: { // src updatable, dst updatable
276
- os.write((char*)getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int));
250
+ os.write((char*)coupons.data(), coupons.size() * sizeof(int));
277
251
  break;
278
252
  }
279
253
  case 1: { // src updatable, dst compact
@@ -292,13 +266,12 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
292
266
 
293
267
  template<typename A>
294
268
  HllSketchImpl<A>* CouponList<A>::couponUpdate(int coupon) {
295
- const int len = 1 << lgCouponArrInts;
296
- for (int i = 0; i < len; ++i) { // search for empty slot
297
- const int couponAtIdx = couponIntArr[i];
269
+ for (size_t i = 0; i < coupons.size(); ++i) { // search for empty slot
270
+ const int couponAtIdx = coupons[i];
298
271
  if (couponAtIdx == HllUtil<A>::EMPTY) {
299
- couponIntArr[i] = coupon; // the actual update
272
+ coupons[i] = coupon; // the actual update
300
273
  ++couponCount;
301
- if (couponCount >= len) { // array full
274
+ if (couponCount == static_cast<int>(coupons.size())) { // array full
302
275
  if (this->lgConfigK < 8) {
303
276
  return promoteHeapListOrSetToHll(*this);
304
277
  }
@@ -348,7 +321,7 @@ bool CouponList<A>::isEmpty() const { return getCouponCount() == 0; }
348
321
 
349
322
  template<typename A>
350
323
  int CouponList<A>::getUpdatableSerializationBytes() const {
351
- return getMemDataStart() + (4 << getLgCouponArrInts());
324
+ return getMemDataStart() + coupons.size() * sizeof(int);
352
325
  }
353
326
 
354
327
  template<typename A>
@@ -383,13 +356,8 @@ void CouponList<A>::putOutOfOrderFlag(bool oooFlag) {
383
356
  }
384
357
 
385
358
  template<typename A>
386
- int CouponList<A>::getLgCouponArrInts() const {
387
- return lgCouponArrInts;
388
- }
389
-
390
- template<typename A>
391
- int* CouponList<A>::getCouponIntArr() const {
392
- return couponIntArr;
359
+ A CouponList<A>::getAllocator() const {
360
+ return coupons.get_allocator();
393
361
  }
394
362
 
395
363
  template<typename A>
@@ -404,12 +372,12 @@ HllSketchImpl<A>* CouponList<A>::promoteHeapListOrSetToHll(CouponList& src) {
404
372
 
405
373
  template<typename A>
406
374
  coupon_iterator<A> CouponList<A>::begin(bool all) const {
407
- return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 0, all);
375
+ return coupon_iterator<A>(coupons.data(), coupons.size(), 0, all);
408
376
  }
409
377
 
410
378
  template<typename A>
411
379
  coupon_iterator<A> CouponList<A>::end() const {
412
- return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 1 << lgCouponArrInts, false);
380
+ return coupon_iterator<A>(coupons.data(), coupons.size(), coupons.size(), false);
413
381
  }
414
382
 
415
383
  }
@@ -30,19 +30,18 @@ namespace datasketches {
30
30
  template<typename A>
31
31
  class HllSketchImplFactory;
32
32
 
33
- template<typename A = std::allocator<char>>
33
+ template<typename A>
34
34
  class CouponList : public HllSketchImpl<A> {
35
35
  public:
36
- explicit CouponList(int lgConfigK, target_hll_type tgtHllType, hll_mode mode);
37
- explicit CouponList(const CouponList& that);
38
- explicit CouponList(const CouponList& that, target_hll_type tgtHllType);
36
+ CouponList(int lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator);
37
+ CouponList(const CouponList& that, target_hll_type tgtHllType);
39
38
 
40
- static CouponList* newList(const void* bytes, size_t len);
41
- static CouponList* newList(std::istream& is);
39
+ static CouponList* newList(const void* bytes, size_t len, const A& allocator);
40
+ static CouponList* newList(std::istream& is, const A& allocator);
42
41
  virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
43
42
  virtual void serialize(std::ostream& os, bool compact) const;
44
43
 
45
- virtual ~CouponList();
44
+ virtual ~CouponList() = default;
46
45
  virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
47
46
 
48
47
  virtual CouponList* copy() const;
@@ -62,7 +61,9 @@ class CouponList : public HllSketchImpl<A> {
62
61
  coupon_iterator<A> end() const;
63
62
 
64
63
  protected:
65
- typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
64
+ using ClAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>>;
65
+
66
+ using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
66
67
 
67
68
  HllSketchImpl<A>* promoteHeapListToSet(CouponList& list);
68
69
  HllSketchImpl<A>* promoteHeapListOrSetToHll(CouponList& src);
@@ -75,13 +76,11 @@ class CouponList : public HllSketchImpl<A> {
75
76
  virtual bool isOutOfOrderFlag() const;
76
77
  virtual void putOutOfOrderFlag(bool oooFlag);
77
78
 
78
- virtual int getLgCouponArrInts() const;
79
- virtual int* getCouponIntArr() const;
79
+ virtual A getAllocator() const;
80
80
 
81
- int lgCouponArrInts;
82
81
  int couponCount;
83
82
  bool oooFlag;
84
- int* couponIntArr;
83
+ vector_int coupons;
85
84
 
86
85
  friend class HllSketchImplFactory<A>;
87
86
  };
@@ -24,7 +24,7 @@
24
24
 
25
25
  namespace datasketches {
26
26
 
27
- template<typename A = std::allocator<char>>
27
+ template<typename A = std::allocator<uint8_t>>
28
28
  class CubicInterpolation {
29
29
  public:
30
30
  static double usingXAndYTables(const double xArr[], const double yArr[],
@@ -40,4 +40,4 @@ class CubicInterpolation {
40
40
 
41
41
  #include "CubicInterpolation-internal.hpp"
42
42
 
43
- #endif /* _CUBICINTERPOLATION_HPP_ */
43
+ #endif /* _CUBICINTERPOLATION_HPP_ */
@@ -25,7 +25,7 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
- template<typename A = std::allocator<char>>
28
+ template<typename A = std::allocator<uint8_t>>
29
29
  class HarmonicNumbers {
30
30
  public:
31
31
  /**
@@ -45,4 +45,4 @@ class HarmonicNumbers {
45
45
 
46
46
  #include "HarmonicNumbers-internal.hpp"
47
47
 
48
- #endif /* _HARMONICNUMBERS_HPP_ */
48
+ #endif /* _HARMONICNUMBERS_HPP_ */
@@ -30,13 +30,12 @@
30
30
  namespace datasketches {
31
31
 
32
32
  template<typename A>
33
- Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize) :
34
- HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize) {
33
+ Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize, const A& allocator):
34
+ HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize, allocator),
35
+ auxHashMap(nullptr)
36
+ {
35
37
  const int numBytes = this->hll4ArrBytes(lgConfigK);
36
- typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
37
- this->hllByteArr = uint8Alloc().allocate(numBytes);
38
- std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
39
- auxHashMap = nullptr;
38
+ this->hllByteArr.resize(numBytes, 0);
40
39
  }
41
40
 
42
41
  template<typename A>
@@ -63,17 +62,19 @@ Hll4Array<A>::~Hll4Array() {
63
62
  template<typename A>
64
63
  std::function<void(HllSketchImpl<A>*)> Hll4Array<A>::get_deleter() const {
65
64
  return [](HllSketchImpl<A>* ptr) {
66
- typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
67
65
  Hll4Array<A>* hll = static_cast<Hll4Array<A>*>(ptr);
66
+ using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
67
+ Hll4Alloc hll4Alloc(hll->getAllocator());
68
68
  hll->~Hll4Array();
69
- hll4Alloc().deallocate(hll, 1);
69
+ hll4Alloc.deallocate(hll, 1);
70
70
  };
71
71
  }
72
72
 
73
73
  template<typename A>
74
74
  Hll4Array<A>* Hll4Array<A>::copy() const {
75
- typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
76
- return new (hll4Alloc().allocate(1)) Hll4Array<A>(*this);
75
+ using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
76
+ Hll4Alloc hll4Alloc(this->getAllocator());
77
+ return new (hll4Alloc.allocate(1)) Hll4Array<A>(*this);
77
78
  }
78
79
 
79
80
  template<typename A>
@@ -195,7 +196,7 @@ void Hll4Array<A>::internalHll4Update(const int slotNo, const int newVal) {
195
196
  // added to the exception table
196
197
  putSlot(slotNo, HllUtil<A>::AUX_TOKEN);
197
198
  if (auxHashMap == nullptr) {
198
- auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK);
199
+ auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK, this->getAllocator());
199
200
  }
200
201
  auxHashMap->mustAdd(slotNo, newVal);
201
202
  }
@@ -285,7 +286,7 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
285
286
  } else { //newShiftedVal >= AUX_TOKEN
286
287
  // the former exception remains an exception, so must be added to the newAuxMap
287
288
  if (newAuxMap == nullptr) {
288
- newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK);
289
+ newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK, this->getAllocator());
289
290
  }
290
291
  newAuxMap->mustAdd(slotNum, oldActualVal);
291
292
  }
@@ -315,12 +316,12 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
315
316
 
316
317
  template<typename A>
317
318
  typename HllArray<A>::const_iterator Hll4Array<A>::begin(bool all) const {
318
- return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all);
319
+ return typename HllArray<A>::const_iterator(this->hllByteArr.data(), 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all);
319
320
  }
320
321
 
321
322
  template<typename A>
322
323
  typename HllArray<A>::const_iterator Hll4Array<A>::end() const {
323
- return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false);
324
+ return typename HllArray<A>::const_iterator(this->hllByteArr.data(), 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false);
324
325
  }
325
326
 
326
327
  template<typename A>