datasketches 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (160) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  4. data/ext/datasketches/ext.cpp +1 -1
  5. data/ext/datasketches/ext.h +4 -0
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/fi_wrapper.cpp +6 -8
  8. data/ext/datasketches/hll_wrapper.cpp +13 -14
  9. data/ext/datasketches/kll_wrapper.cpp +28 -76
  10. data/ext/datasketches/theta_wrapper.cpp +27 -41
  11. data/ext/datasketches/vo_wrapper.cpp +4 -6
  12. data/lib/datasketches/version.rb +1 -1
  13. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  14. data/vendor/datasketches-cpp/README.md +4 -4
  15. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
  16. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  17. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  18. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  19. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  20. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
  22. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
  24. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
  25. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  26. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
  27. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
  28. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
  29. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
  30. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
  31. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  33. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  34. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
  35. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  36. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  38. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
  41. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
  44. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
  45. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
  46. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
  47. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  48. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  49. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
  50. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
  51. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
  52. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
  53. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
  54. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
  55. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
  56. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
  57. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
  58. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
  59. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
  60. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
  62. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  63. data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
  64. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
  65. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
  66. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
  67. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
  68. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
  70. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
  71. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
  72. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  74. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
  75. data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
  76. data/vendor/datasketches-cpp/python/README.md +6 -3
  77. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  78. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
  79. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
  80. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  81. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
  82. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
  83. data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
  84. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  85. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  86. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  87. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
  88. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  89. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
  90. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  91. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  92. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  93. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  94. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  95. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  96. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  97. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  98. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  99. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  100. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  101. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
  104. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  105. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
  106. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  107. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  108. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
  109. data/vendor/datasketches-cpp/setup.py +5 -3
  110. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  111. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
  112. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  114. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  115. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  116. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
  117. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
  119. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  120. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  121. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  122. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  123. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
  124. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  125. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  126. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
  127. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
  128. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  129. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  130. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
  131. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  132. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
  133. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
  134. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  135. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
  136. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
  137. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  138. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
  141. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  142. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
  143. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
  144. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
  145. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
  146. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
  147. metadata +43 -34
  148. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  149. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  150. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  151. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  152. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  153. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  154. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  155. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  156. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  157. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  160. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -24,20 +24,20 @@
24
24
 
25
25
  namespace datasketches {
26
26
 
27
- template<typename A = std::allocator<char>>
27
+ template<typename A>
28
28
  class CouponHashSet : public CouponList<A> {
29
29
  public:
30
- static CouponHashSet* newSet(const void* bytes, size_t len);
31
- static CouponHashSet* newSet(std::istream& is);
32
- explicit CouponHashSet(int lgConfigK, target_hll_type tgtHllType);
33
- explicit CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType);
34
- explicit CouponHashSet(const CouponHashSet& that);
30
+ static CouponHashSet* newSet(const void* bytes, size_t len, const A& allocator);
31
+ static CouponHashSet* newSet(std::istream& is, const A& allocator);
32
+ CouponHashSet(int lgConfigK, target_hll_type tgtHllType, const A& allocator);
33
+ CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType);
35
34
 
36
- virtual ~CouponHashSet();
35
+ virtual ~CouponHashSet() = default;
37
36
  virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
38
37
 
39
38
  protected:
40
-
39
+ using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
40
+
41
41
  virtual CouponHashSet* copy() const;
42
42
  virtual CouponHashSet* copyAs(target_hll_type tgtHllType) const;
43
43
 
@@ -49,9 +49,9 @@ class CouponHashSet : public CouponList<A> {
49
49
  friend class HllSketchImplFactory<A>;
50
50
 
51
51
  private:
52
- typedef typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>> chsAlloc;
52
+ using ChsAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>>;
53
53
  bool checkGrowOrPromote();
54
- void growHashSet(int srcLgCoupArrSize, int tgtLgCoupArrSize);
54
+ void growHashSet(int tgtLgCoupArrSize);
55
55
  };
56
56
 
57
57
  }
@@ -23,6 +23,7 @@
23
23
  #include "CouponList.hpp"
24
24
  #include "CubicInterpolation.hpp"
25
25
  #include "HllUtil.hpp"
26
+ #include "count_zeros.hpp"
26
27
 
27
28
  #include <algorithm>
28
29
  #include <cmath>
@@ -30,74 +31,45 @@
30
31
  namespace datasketches {
31
32
 
32
33
  template<typename A>
33
- CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode)
34
- : HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false) {
35
- if (mode == hll_mode::LIST) {
36
- lgCouponArrInts = HllUtil<A>::LG_INIT_LIST_SIZE;
37
- } else { // mode == SET
38
- lgCouponArrInts = HllUtil<A>::LG_INIT_SET_SIZE;
39
- }
40
- oooFlag = false;
41
- const int arrayLen = 1 << lgCouponArrInts;
42
- typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
43
- couponIntArr = intAlloc().allocate(arrayLen);
44
- std::fill(couponIntArr, couponIntArr + arrayLen, 0);
45
- couponCount = 0;
46
- }
47
-
48
- template<typename A>
49
- CouponList<A>::CouponList(const CouponList& that)
50
- : HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, that.mode, false),
51
- lgCouponArrInts(that.lgCouponArrInts),
52
- couponCount(that.couponCount),
53
- oooFlag(that.oooFlag) {
54
-
55
- const int numItems = 1 << lgCouponArrInts;
56
- typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
57
- couponIntArr = intAlloc().allocate(numItems);
58
- std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
59
- }
60
-
61
- template<typename A>
62
- CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType)
63
- : HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
64
- lgCouponArrInts(that.lgCouponArrInts),
65
- couponCount(that.couponCount),
66
- oooFlag(that.oooFlag) {
67
-
68
- const int numItems = 1 << lgCouponArrInts;
69
- typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
70
- couponIntArr = intAlloc().allocate(numItems);
71
- std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
72
- }
34
+ CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode, const A& allocator):
35
+ HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false),
36
+ couponCount(0),
37
+ oooFlag(false),
38
+ coupons(1 << (mode == hll_mode::LIST ? HllUtil<A>::LG_INIT_LIST_SIZE : HllUtil<A>::LG_INIT_SET_SIZE), 0, allocator)
39
+ {}
73
40
 
74
41
  template<typename A>
75
- CouponList<A>::~CouponList() {
76
- typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
77
- intAlloc().deallocate(couponIntArr, 1 << lgCouponArrInts);
78
- }
42
+ CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType):
43
+ HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
44
+ couponCount(that.couponCount),
45
+ oooFlag(that.oooFlag),
46
+ coupons(that.coupons)
47
+ {}
79
48
 
80
49
  template<typename A>
81
50
  std::function<void(HllSketchImpl<A>*)> CouponList<A>::get_deleter() const {
82
51
  return [](HllSketchImpl<A>* ptr) {
83
52
  CouponList<A>* cl = static_cast<CouponList<A>*>(ptr);
53
+ ClAlloc cla(cl->getAllocator());
84
54
  cl->~CouponList();
85
- clAlloc().deallocate(cl, 1);
55
+ cla.deallocate(cl, 1);
86
56
  };
87
57
  }
88
58
 
89
59
  template<typename A>
90
60
  CouponList<A>* CouponList<A>::copy() const {
91
- return new (clAlloc().allocate(1)) CouponList<A>(*this);
61
+ ClAlloc cla(coupons.get_allocator());
62
+ return new (cla.allocate(1)) CouponList<A>(*this);
92
63
  }
93
64
 
94
65
  template<typename A>
95
66
  CouponList<A>* CouponList<A>::copyAs(target_hll_type tgtHllType) const {
96
- return new (clAlloc().allocate(1)) CouponList<A>(*this, tgtHllType);
67
+ ClAlloc cla(coupons.get_allocator());
68
+ return new (cla.allocate(1)) CouponList<A>(*this, tgtHllType);
97
69
  }
98
70
 
99
71
  template<typename A>
100
- CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
72
+ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len, const A& allocator) {
101
73
  if (len < HllUtil<A>::LIST_INT_ARR_START) {
102
74
  throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
103
75
  }
@@ -115,7 +87,7 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
115
87
 
116
88
  hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
117
89
  if (mode != LIST) {
118
- throw std::invalid_argument("Calling set construtor with non-list mode data");
90
+ throw std::invalid_argument("Calling list constructor with non-list mode data");
119
91
  }
120
92
 
121
93
  target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
@@ -133,20 +105,21 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
133
105
  + ", found: " + std::to_string(len));
134
106
  }
135
107
 
136
- CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode);
108
+ ClAlloc cla(allocator);
109
+ CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
137
110
  sketch->couponCount = couponCount;
138
111
  sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST
139
112
 
140
113
  if (!emptyFlag) {
141
114
  // only need to read valid coupons, unlike in stream case
142
- std::memcpy(sketch->couponIntArr, data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int));
115
+ std::memcpy(sketch->coupons.data(), data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int));
143
116
  }
144
117
 
145
118
  return sketch;
146
119
  }
147
120
 
148
121
  template<typename A>
149
- CouponList<A>* CouponList<A>::newList(std::istream& is) {
122
+ CouponList<A>* CouponList<A>::newList(std::istream& is, const A& allocator) {
150
123
  uint8_t listHeader[8];
151
124
  is.read((char*)listHeader, 8 * sizeof(uint8_t));
152
125
 
@@ -162,7 +135,7 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
162
135
 
163
136
  hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
164
137
  if (mode != LIST) {
165
- throw std::invalid_argument("Calling list construtor with non-list mode data");
138
+ throw std::invalid_argument("Calling list constructor with non-list mode data");
166
139
  }
167
140
 
168
141
  const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
@@ -172,8 +145,9 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
172
145
  const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
173
146
  const bool emptyFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false);
174
147
 
175
- CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode);
176
- typedef std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>> coupon_list_ptr;
148
+ ClAlloc cla(allocator);
149
+ CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
150
+ using coupon_list_ptr = std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>>;
177
151
  coupon_list_ptr ptr(sketch, sketch->get_deleter());
178
152
  const int couponCount = listHeader[HllUtil<A>::LIST_COUNT_BYTE];
179
153
  sketch->couponCount = couponCount;
@@ -183,8 +157,8 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
183
157
  // For stream processing, need to read entire number written to stream so read
184
158
  // pointer ends up set correctly.
185
159
  // If not compact, still need to read empty items even though in order.
186
- const int numToRead = (compact ? couponCount : (1 << sketch->lgCouponArrInts));
187
- is.read((char*)sketch->couponIntArr, numToRead * sizeof(int));
160
+ const int numToRead = (compact ? couponCount : sketch->coupons.size());
161
+ is.read((char*)sketch->coupons.data(), numToRead * sizeof(int));
188
162
  }
189
163
 
190
164
  if (!is.good())
@@ -196,14 +170,14 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
196
170
  template<typename A>
197
171
  vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const {
198
172
  const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
199
- vector_u8<A> byteArr(sketchSizeBytes);
173
+ vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
200
174
  uint8_t* bytes = byteArr.data() + header_size_bytes;
201
175
 
202
176
  bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
203
177
  bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER);
204
178
  bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID);
205
179
  bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK);
206
- bytes[HllUtil<A>::LG_ARR_BYTE] = static_cast<uint8_t>(lgCouponArrInts);
180
+ bytes[HllUtil<A>::LG_ARR_BYTE] = count_trailing_zeros_in_u32(coupons.size());
207
181
  bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact);
208
182
  bytes[HllUtil<A>::LIST_COUNT_BYTE] = static_cast<uint8_t>(this->mode == LIST ? couponCount : 0);
209
183
  bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte();
@@ -217,7 +191,7 @@ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes)
217
191
  const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
218
192
  switch (sw) {
219
193
  case 0: { // src updatable, dst updatable
220
- std::memcpy(bytes + getMemDataStart(), getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int));
194
+ std::memcpy(bytes + getMemDataStart(), coupons.data(), coupons.size() * sizeof(int));
221
195
  break;
222
196
  }
223
197
  case 1: { // src updatable, dst compact
@@ -247,7 +221,7 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
247
221
  os.write((char*)&familyId, sizeof(familyId));
248
222
  const uint8_t lgKByte((uint8_t) this->lgConfigK);
249
223
  os.write((char*)&lgKByte, sizeof(lgKByte));
250
- const uint8_t lgArrIntsByte((uint8_t) lgCouponArrInts);
224
+ const uint8_t lgArrIntsByte(count_trailing_zeros_in_u32(coupons.size()));
251
225
  os.write((char*)&lgArrIntsByte, sizeof(lgArrIntsByte));
252
226
  const uint8_t flagsByte(this->makeFlagsByte(compact));
253
227
  os.write((char*)&flagsByte, sizeof(flagsByte));
@@ -273,7 +247,7 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
273
247
  const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
274
248
  switch (sw) {
275
249
  case 0: { // src updatable, dst updatable
276
- os.write((char*)getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int));
250
+ os.write((char*)coupons.data(), coupons.size() * sizeof(int));
277
251
  break;
278
252
  }
279
253
  case 1: { // src updatable, dst compact
@@ -292,13 +266,12 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
292
266
 
293
267
  template<typename A>
294
268
  HllSketchImpl<A>* CouponList<A>::couponUpdate(int coupon) {
295
- const int len = 1 << lgCouponArrInts;
296
- for (int i = 0; i < len; ++i) { // search for empty slot
297
- const int couponAtIdx = couponIntArr[i];
269
+ for (size_t i = 0; i < coupons.size(); ++i) { // search for empty slot
270
+ const int couponAtIdx = coupons[i];
298
271
  if (couponAtIdx == HllUtil<A>::EMPTY) {
299
- couponIntArr[i] = coupon; // the actual update
272
+ coupons[i] = coupon; // the actual update
300
273
  ++couponCount;
301
- if (couponCount >= len) { // array full
274
+ if (couponCount == static_cast<int>(coupons.size())) { // array full
302
275
  if (this->lgConfigK < 8) {
303
276
  return promoteHeapListOrSetToHll(*this);
304
277
  }
@@ -348,7 +321,7 @@ bool CouponList<A>::isEmpty() const { return getCouponCount() == 0; }
348
321
 
349
322
  template<typename A>
350
323
  int CouponList<A>::getUpdatableSerializationBytes() const {
351
- return getMemDataStart() + (4 << getLgCouponArrInts());
324
+ return getMemDataStart() + coupons.size() * sizeof(int);
352
325
  }
353
326
 
354
327
  template<typename A>
@@ -383,13 +356,8 @@ void CouponList<A>::putOutOfOrderFlag(bool oooFlag) {
383
356
  }
384
357
 
385
358
  template<typename A>
386
- int CouponList<A>::getLgCouponArrInts() const {
387
- return lgCouponArrInts;
388
- }
389
-
390
- template<typename A>
391
- int* CouponList<A>::getCouponIntArr() const {
392
- return couponIntArr;
359
+ A CouponList<A>::getAllocator() const {
360
+ return coupons.get_allocator();
393
361
  }
394
362
 
395
363
  template<typename A>
@@ -404,12 +372,12 @@ HllSketchImpl<A>* CouponList<A>::promoteHeapListOrSetToHll(CouponList& src) {
404
372
 
405
373
  template<typename A>
406
374
  coupon_iterator<A> CouponList<A>::begin(bool all) const {
407
- return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 0, all);
375
+ return coupon_iterator<A>(coupons.data(), coupons.size(), 0, all);
408
376
  }
409
377
 
410
378
  template<typename A>
411
379
  coupon_iterator<A> CouponList<A>::end() const {
412
- return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 1 << lgCouponArrInts, false);
380
+ return coupon_iterator<A>(coupons.data(), coupons.size(), coupons.size(), false);
413
381
  }
414
382
 
415
383
  }
@@ -30,19 +30,18 @@ namespace datasketches {
30
30
  template<typename A>
31
31
  class HllSketchImplFactory;
32
32
 
33
- template<typename A = std::allocator<char>>
33
+ template<typename A>
34
34
  class CouponList : public HllSketchImpl<A> {
35
35
  public:
36
- explicit CouponList(int lgConfigK, target_hll_type tgtHllType, hll_mode mode);
37
- explicit CouponList(const CouponList& that);
38
- explicit CouponList(const CouponList& that, target_hll_type tgtHllType);
36
+ CouponList(int lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator);
37
+ CouponList(const CouponList& that, target_hll_type tgtHllType);
39
38
 
40
- static CouponList* newList(const void* bytes, size_t len);
41
- static CouponList* newList(std::istream& is);
39
+ static CouponList* newList(const void* bytes, size_t len, const A& allocator);
40
+ static CouponList* newList(std::istream& is, const A& allocator);
42
41
  virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
43
42
  virtual void serialize(std::ostream& os, bool compact) const;
44
43
 
45
- virtual ~CouponList();
44
+ virtual ~CouponList() = default;
46
45
  virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
47
46
 
48
47
  virtual CouponList* copy() const;
@@ -62,7 +61,9 @@ class CouponList : public HllSketchImpl<A> {
62
61
  coupon_iterator<A> end() const;
63
62
 
64
63
  protected:
65
- typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
64
+ using ClAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>>;
65
+
66
+ using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
66
67
 
67
68
  HllSketchImpl<A>* promoteHeapListToSet(CouponList& list);
68
69
  HllSketchImpl<A>* promoteHeapListOrSetToHll(CouponList& src);
@@ -75,13 +76,11 @@ class CouponList : public HllSketchImpl<A> {
75
76
  virtual bool isOutOfOrderFlag() const;
76
77
  virtual void putOutOfOrderFlag(bool oooFlag);
77
78
 
78
- virtual int getLgCouponArrInts() const;
79
- virtual int* getCouponIntArr() const;
79
+ virtual A getAllocator() const;
80
80
 
81
- int lgCouponArrInts;
82
81
  int couponCount;
83
82
  bool oooFlag;
84
- int* couponIntArr;
83
+ vector_int coupons;
85
84
 
86
85
  friend class HllSketchImplFactory<A>;
87
86
  };
@@ -24,7 +24,7 @@
24
24
 
25
25
  namespace datasketches {
26
26
 
27
- template<typename A = std::allocator<char>>
27
+ template<typename A = std::allocator<uint8_t>>
28
28
  class CubicInterpolation {
29
29
  public:
30
30
  static double usingXAndYTables(const double xArr[], const double yArr[],
@@ -40,4 +40,4 @@ class CubicInterpolation {
40
40
 
41
41
  #include "CubicInterpolation-internal.hpp"
42
42
 
43
- #endif /* _CUBICINTERPOLATION_HPP_ */
43
+ #endif /* _CUBICINTERPOLATION_HPP_ */
@@ -25,7 +25,7 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
- template<typename A = std::allocator<char>>
28
+ template<typename A = std::allocator<uint8_t>>
29
29
  class HarmonicNumbers {
30
30
  public:
31
31
  /**
@@ -45,4 +45,4 @@ class HarmonicNumbers {
45
45
 
46
46
  #include "HarmonicNumbers-internal.hpp"
47
47
 
48
- #endif /* _HARMONICNUMBERS_HPP_ */
48
+ #endif /* _HARMONICNUMBERS_HPP_ */
@@ -30,13 +30,12 @@
30
30
  namespace datasketches {
31
31
 
32
32
  template<typename A>
33
- Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize) :
34
- HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize) {
33
+ Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize, const A& allocator):
34
+ HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize, allocator),
35
+ auxHashMap(nullptr)
36
+ {
35
37
  const int numBytes = this->hll4ArrBytes(lgConfigK);
36
- typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
37
- this->hllByteArr = uint8Alloc().allocate(numBytes);
38
- std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
39
- auxHashMap = nullptr;
38
+ this->hllByteArr.resize(numBytes, 0);
40
39
  }
41
40
 
42
41
  template<typename A>
@@ -63,17 +62,19 @@ Hll4Array<A>::~Hll4Array() {
63
62
  template<typename A>
64
63
  std::function<void(HllSketchImpl<A>*)> Hll4Array<A>::get_deleter() const {
65
64
  return [](HllSketchImpl<A>* ptr) {
66
- typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
67
65
  Hll4Array<A>* hll = static_cast<Hll4Array<A>*>(ptr);
66
+ using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
67
+ Hll4Alloc hll4Alloc(hll->getAllocator());
68
68
  hll->~Hll4Array();
69
- hll4Alloc().deallocate(hll, 1);
69
+ hll4Alloc.deallocate(hll, 1);
70
70
  };
71
71
  }
72
72
 
73
73
  template<typename A>
74
74
  Hll4Array<A>* Hll4Array<A>::copy() const {
75
- typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
76
- return new (hll4Alloc().allocate(1)) Hll4Array<A>(*this);
75
+ using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
76
+ Hll4Alloc hll4Alloc(this->getAllocator());
77
+ return new (hll4Alloc.allocate(1)) Hll4Array<A>(*this);
77
78
  }
78
79
 
79
80
  template<typename A>
@@ -195,7 +196,7 @@ void Hll4Array<A>::internalHll4Update(const int slotNo, const int newVal) {
195
196
  // added to the exception table
196
197
  putSlot(slotNo, HllUtil<A>::AUX_TOKEN);
197
198
  if (auxHashMap == nullptr) {
198
- auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK);
199
+ auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK, this->getAllocator());
199
200
  }
200
201
  auxHashMap->mustAdd(slotNo, newVal);
201
202
  }
@@ -285,7 +286,7 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
285
286
  } else { //newShiftedVal >= AUX_TOKEN
286
287
  // the former exception remains an exception, so must be added to the newAuxMap
287
288
  if (newAuxMap == nullptr) {
288
- newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK);
289
+ newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK, this->getAllocator());
289
290
  }
290
291
  newAuxMap->mustAdd(slotNum, oldActualVal);
291
292
  }
@@ -315,12 +316,12 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
315
316
 
316
317
  template<typename A>
317
318
  typename HllArray<A>::const_iterator Hll4Array<A>::begin(bool all) const {
318
- return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all);
319
+ return typename HllArray<A>::const_iterator(this->hllByteArr.data(), 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all);
319
320
  }
320
321
 
321
322
  template<typename A>
322
323
  typename HllArray<A>::const_iterator Hll4Array<A>::end() const {
323
- return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false);
324
+ return typename HllArray<A>::const_iterator(this->hllByteArr.data(), 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false);
324
325
  }
325
326
 
326
327
  template<typename A>