datasketches 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  4. data/ext/datasketches/ext.cpp +1 -1
  5. data/ext/datasketches/ext.h +4 -0
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/fi_wrapper.cpp +6 -8
  8. data/ext/datasketches/hll_wrapper.cpp +13 -14
  9. data/ext/datasketches/kll_wrapper.cpp +28 -76
  10. data/ext/datasketches/theta_wrapper.cpp +27 -41
  11. data/ext/datasketches/vo_wrapper.cpp +4 -6
  12. data/lib/datasketches/version.rb +1 -1
  13. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  14. data/vendor/datasketches-cpp/README.md +4 -4
  15. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
  16. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  17. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  18. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  19. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  20. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
  22. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
  24. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
  25. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  26. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
  27. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
  28. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
  29. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
  30. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
  31. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  33. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  34. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
  35. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  36. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  38. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
  41. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
  44. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
  45. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
  46. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
  47. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  48. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  49. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
  50. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
  51. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
  52. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
  53. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
  54. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
  55. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
  56. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
  57. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
  58. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
  59. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
  60. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
  62. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  63. data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
  64. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
  65. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
  66. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
  67. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
  68. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
  70. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
  71. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
  72. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  74. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
  75. data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
  76. data/vendor/datasketches-cpp/python/README.md +6 -3
  77. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  78. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
  79. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
  80. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  81. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
  82. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
  83. data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
  84. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  85. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  86. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  87. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
  88. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  89. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
  90. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  91. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  92. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  93. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  94. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  95. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  96. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  97. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  98. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  99. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  100. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  101. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
  104. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  105. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
  106. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  107. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  108. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
  109. data/vendor/datasketches-cpp/setup.py +5 -3
  110. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  111. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
  112. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  114. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  115. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  116. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
  117. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
  119. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  120. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  121. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  122. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  123. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
  124. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  125. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  126. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
  127. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
  128. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  129. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  130. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
  131. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  132. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
  133. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
  134. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  135. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
  136. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
  137. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  138. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
  141. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  142. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
  143. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
  144. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
  145. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
  146. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
  147. metadata +43 -34
  148. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  149. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  150. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  151. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  152. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  153. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  154. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  155. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  156. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  157. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  160. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -62,7 +62,7 @@ class var_opt_sketch {
62
62
  static const resize_factor DEFAULT_RESIZE_FACTOR = X8;
63
63
  static const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
64
64
 
65
- explicit var_opt_sketch(uint32_t k, resize_factor rf = DEFAULT_RESIZE_FACTOR);
65
+ explicit var_opt_sketch(uint32_t k, resize_factor rf = DEFAULT_RESIZE_FACTOR, const A& allocator = A());
66
66
  var_opt_sketch(const var_opt_sketch& other);
67
67
  var_opt_sketch(var_opt_sketch&& other) noexcept;
68
68
 
@@ -167,7 +167,7 @@ class var_opt_sketch {
167
167
  * @param is input stream
168
168
  * @return an instance of a sketch
169
169
  */
170
- static var_opt_sketch deserialize(std::istream& is);
170
+ static var_opt_sketch deserialize(std::istream& is, const A& allocator = A());
171
171
 
172
172
  /**
173
173
  * This method deserializes a sketch from a given array of bytes.
@@ -175,7 +175,7 @@ class var_opt_sketch {
175
175
  * @param size the size of the array
176
176
  * @return an instance of a sketch
177
177
  */
178
- static var_opt_sketch deserialize(const void* bytes, size_t size);
178
+ static var_opt_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
179
179
 
180
180
  /**
181
181
  * Prints a summary of the sketch.
@@ -226,8 +226,9 @@ class var_opt_sketch {
226
226
  resize_factor rf_; // resize factor
227
227
 
228
228
  uint32_t curr_items_alloc_; // currently allocated array size
229
- bool filled_data_; // true if we've explciitly set all entries in data_
229
+ bool filled_data_; // true if we've explicitly set all entries in data_
230
230
 
231
+ A allocator_;
231
232
  T* data_; // stored sampled items
232
233
  double* weights_; // weights for sampled items
233
234
 
@@ -249,20 +250,20 @@ class var_opt_sketch {
249
250
  // occurs and is properly tracked.
250
251
  bool* marks_;
251
252
 
252
- // used during deserialization to avoid memork leaks upon errors
253
+ // used during deserialization to avoid memory leaks upon errors
253
254
  class items_deleter;
254
255
  class weights_deleter;
255
256
  class marks_deleter;
256
257
 
257
- var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget);
258
+ var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget, const A& allocator);
258
259
  var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32_t r, uint64_t n, double total_wt_r, resize_factor rf,
259
260
  uint32_t curr_items_alloc, bool filled_data, std::unique_ptr<T, items_deleter> items,
260
261
  std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
261
- std::unique_ptr<bool, marks_deleter> marks);
262
+ std::unique_ptr<bool, marks_deleter> marks, const A& allocator);
262
263
 
263
264
  friend class var_opt_union<T,S,A>;
264
265
  var_opt_sketch(const var_opt_sketch& other, bool as_sketch, uint64_t adjusted_n);
265
- var_opt_sketch(T* data, double* weights, size_t len, uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r);
266
+ var_opt_sketch(T* data, double* weights, size_t len, uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r, const A& allocator);
266
267
 
267
268
  string<A> items_to_string(bool print_gap) const;
268
269
 
@@ -353,7 +354,7 @@ private:
353
354
  double r_item_wt_;
354
355
  size_t idx_;
355
356
  const size_t final_idx_;
356
- bool weight_correction_;
357
+ // bool weight_correction_;
357
358
  };
358
359
 
359
360
  // non-const iterator for internal use
@@ -42,8 +42,8 @@ namespace datasketches {
42
42
  * author Jon Malkin
43
43
  */
44
44
  template<typename T, typename S, typename A>
45
- var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf) :
46
- var_opt_sketch<T,S,A>(k, rf, false) {}
45
+ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, const A& allocator) :
46
+ var_opt_sketch<T,S,A>(k, rf, false, allocator) {}
47
47
 
48
48
  template<typename T, typename S, typename A>
49
49
  var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
@@ -56,12 +56,13 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
56
56
  rf_(other.rf_),
57
57
  curr_items_alloc_(other.curr_items_alloc_),
58
58
  filled_data_(other.filled_data_),
59
+ allocator_(other.allocator_),
59
60
  data_(nullptr),
60
61
  weights_(nullptr),
61
62
  num_marks_in_h_(other.num_marks_in_h_),
62
63
  marks_(nullptr)
63
64
  {
64
- data_ = A().allocate(curr_items_alloc_);
65
+ data_ = allocator_.allocate(curr_items_alloc_);
65
66
  // skip gap or anything unused at the end
66
67
  for (size_t i = 0; i < h_; ++i)
67
68
  new (&data_[i]) T(other.data_[i]);
@@ -71,13 +72,13 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
71
72
  // we skipped the gap
72
73
  filled_data_ = false;
73
74
 
74
- weights_ = AllocDouble().allocate(curr_items_alloc_);
75
+ weights_ = AllocDouble(allocator_).allocate(curr_items_alloc_);
75
76
  // doubles so can successfully copy regardless of the internal state
76
- std::copy(&other.weights_[0], &other.weights_[curr_items_alloc_], weights_);
77
-
77
+ std::copy(other.weights_, other.weights_ + curr_items_alloc_, weights_);
78
+
78
79
  if (other.marks_ != nullptr) {
79
- marks_ = AllocBool().allocate(curr_items_alloc_);
80
- std::copy(&other.marks_[0], &other.marks_[curr_items_alloc_], marks_);
80
+ marks_ = AllocBool(allocator_).allocate(curr_items_alloc_);
81
+ std::copy(other.marks_, other.marks_ + curr_items_alloc_, marks_);
81
82
  }
82
83
  }
83
84
 
@@ -92,12 +93,13 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketc
92
93
  rf_(other.rf_),
93
94
  curr_items_alloc_(other.curr_items_alloc_),
94
95
  filled_data_(other.filled_data_),
96
+ allocator_(other.allocator_),
95
97
  data_(nullptr),
96
98
  weights_(nullptr),
97
99
  num_marks_in_h_(other.num_marks_in_h_),
98
100
  marks_(nullptr)
99
101
  {
100
- data_ = A().allocate(curr_items_alloc_);
102
+ data_ = allocator_.allocate(curr_items_alloc_);
101
103
  // skip gap or anything unused at the end
102
104
  for (size_t i = 0; i < h_; ++i)
103
105
  new (&data_[i]) T(other.data_[i]);
@@ -107,19 +109,19 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketc
107
109
  // we skipped the gap
108
110
  filled_data_ = false;
109
111
 
110
- weights_ = AllocDouble().allocate(curr_items_alloc_);
112
+ weights_ = AllocDouble(allocator_).allocate(curr_items_alloc_);
111
113
  // doubles so can successfully copy regardless of the internal state
112
- std::copy(&other.weights_[0], &other.weights_[curr_items_alloc_], weights_);
114
+ std::copy(other.weights_, other.weights_ + curr_items_alloc_, weights_);
113
115
 
114
116
  if (!as_sketch && other.marks_ != nullptr) {
115
- marks_ = AllocBool().allocate(curr_items_alloc_);
116
- std::copy(&other.marks_[0], &other.marks_[curr_items_alloc_], marks_);
117
+ marks_ = AllocBool(allocator_).allocate(curr_items_alloc_);
118
+ std::copy(other.marks_, other.marks_ + curr_items_alloc_, marks_);
117
119
  }
118
120
  }
119
121
 
120
122
  template<typename T, typename S, typename A>
121
123
  var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
122
- uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r) :
124
+ uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r, const A& allocator) :
123
125
  k_(k),
124
126
  h_(h_count),
125
127
  m_(0),
@@ -129,6 +131,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
129
131
  rf_(DEFAULT_RESIZE_FACTOR),
130
132
  curr_items_alloc_(len),
131
133
  filled_data_(n > k),
134
+ allocator_(allocator),
132
135
  data_(data),
133
136
  weights_(weights),
134
137
  num_marks_in_h_(0),
@@ -146,6 +149,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
146
149
  rf_(other.rf_),
147
150
  curr_items_alloc_(other.curr_items_alloc_),
148
151
  filled_data_(other.filled_data_),
152
+ allocator_(other.allocator_),
149
153
  data_(other.data_),
150
154
  weights_(other.weights_),
151
155
  num_marks_in_h_(other.num_marks_in_h_),
@@ -157,8 +161,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
157
161
  }
158
162
 
159
163
  template<typename T, typename S, typename A>
160
- var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget) :
161
- k_(k), h_(0), m_(0), r_(0), n_(0), total_wt_r_(0.0), rf_(rf) {
164
+ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget, const A& allocator) :
165
+ k_(k), h_(0), m_(0), r_(0), n_(0), total_wt_r_(0.0), rf_(rf), allocator_(allocator) {
162
166
  if (k == 0 || k_ > MAX_K) {
163
167
  throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
164
168
  }
@@ -178,7 +182,7 @@ template<typename T, typename S, typename A>
178
182
  var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32_t r, uint64_t n, double total_wt_r, resize_factor rf,
179
183
  uint32_t curr_items_alloc, bool filled_data, std::unique_ptr<T, items_deleter> items,
180
184
  std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
181
- std::unique_ptr<bool, marks_deleter> marks) :
185
+ std::unique_ptr<bool, marks_deleter> marks, const A& allocator) :
182
186
  k_(k),
183
187
  h_(h),
184
188
  m_(m),
@@ -188,6 +192,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32
188
192
  rf_(rf),
189
193
  curr_items_alloc_(curr_items_alloc),
190
194
  filled_data_(filled_data),
195
+ allocator_(allocator),
191
196
  data_(items.release()),
192
197
  weights_(weights.release()),
193
198
  num_marks_in_h_(num_marks_in_h),
@@ -202,27 +207,27 @@ var_opt_sketch<T,S,A>::~var_opt_sketch() {
202
207
  // destroy everything
203
208
  const size_t num_to_destroy = std::min(k_ + 1, curr_items_alloc_);
204
209
  for (size_t i = 0; i < num_to_destroy; ++i) {
205
- A().destroy(data_ + i);
210
+ allocator_.destroy(data_ + i);
206
211
  }
207
212
  } else {
208
213
  // skip gap or anything unused at the end
209
214
  for (size_t i = 0; i < h_; ++i) {
210
- A().destroy(data_+ i);
215
+ allocator_.destroy(data_+ i);
211
216
  }
212
217
 
213
218
  for (size_t i = h_ + 1; i < h_ + r_ + 1; ++i) {
214
- A().destroy(data_ + i);
219
+ allocator_.destroy(data_ + i);
215
220
  }
216
221
  }
217
- A().deallocate(data_, curr_items_alloc_);
222
+ allocator_.deallocate(data_, curr_items_alloc_);
218
223
  }
219
224
 
220
225
  if (weights_ != nullptr) {
221
- AllocDouble().deallocate(weights_, curr_items_alloc_);
226
+ AllocDouble(allocator_).deallocate(weights_, curr_items_alloc_);
222
227
  }
223
228
 
224
229
  if (marks_ != nullptr) {
225
- AllocBool().deallocate(marks_, curr_items_alloc_);
230
+ AllocBool(allocator_).deallocate(marks_, curr_items_alloc_);
226
231
  }
227
232
  }
228
233
 
@@ -238,6 +243,7 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(const var_opt_sketch& ot
238
243
  std::swap(rf_, sk_copy.rf_);
239
244
  std::swap(curr_items_alloc_, sk_copy.curr_items_alloc_);
240
245
  std::swap(filled_data_, sk_copy.filled_data_);
246
+ std::swap(allocator_, sk_copy.allocator_);
241
247
  std::swap(data_, sk_copy.data_);
242
248
  std::swap(weights_, sk_copy.weights_);
243
249
  std::swap(num_marks_in_h_, sk_copy.num_marks_in_h_);
@@ -256,6 +262,7 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(var_opt_sketch&& other)
256
262
  std::swap(rf_, other.rf_);
257
263
  std::swap(curr_items_alloc_, other.curr_items_alloc_);
258
264
  std::swap(filled_data_, other.filled_data_);
265
+ std::swap(allocator_, other.allocator_);
259
266
  std::swap(data_, other.data_);
260
267
  std::swap(weights_, other.weights_);
261
268
  std::swap(num_marks_in_h_, other.num_marks_in_h_);
@@ -335,7 +342,7 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
335
342
  template<typename T, typename S, typename A>
336
343
  std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned header_size_bytes) const {
337
344
  const size_t size = header_size_bytes + get_serialized_size_bytes();
338
- std::vector<uint8_t, AllocU8<A>> bytes(size);
345
+ std::vector<uint8_t, AllocU8<A>> bytes(size, 0, allocator_);
339
346
  uint8_t* ptr = bytes.data() + header_size_bytes;
340
347
  uint8_t* end_ptr = ptr + size;
341
348
 
@@ -468,7 +475,7 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
468
475
  }
469
476
 
470
477
  template<typename T, typename S, typename A>
471
- var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size) {
478
+ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
472
479
  ensure_minimum_memory(size, 8);
473
480
  const char* ptr = static_cast<const char*>(bytes);
474
481
  const char* base = ptr;
@@ -494,7 +501,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
494
501
  const bool is_gadget = flags & GADGET_FLAG_MASK;
495
502
 
496
503
  if (is_empty) {
497
- return var_opt_sketch<T,S,A>(k, rf, is_gadget);
504
+ return var_opt_sketch<T,S,A>(k, rf, is_gadget, allocator);
498
505
  }
499
506
 
500
507
  // second and third prelongs
@@ -520,7 +527,8 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
520
527
 
521
528
  // read the first h_ weights, fill in rest of array with -1.0
522
529
  check_memory_size(ptr - base + (h * sizeof(double)), size);
523
- std::unique_ptr<double, weights_deleter> weights(AllocDouble().allocate(array_size), weights_deleter(array_size));
530
+ std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
531
+ weights_deleter(array_size, allocator));
524
532
  double* wts = weights.get(); // to avoid lots of .get() calls -- do not delete
525
533
  ptr += copy_from_mem(ptr, wts, h * sizeof(double));
526
534
  for (size_t i = 0; i < h; ++i) {
@@ -528,14 +536,14 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
528
536
  throw std::invalid_argument("Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
529
537
  }
530
538
  }
531
- std::fill(&wts[h], &wts[array_size], -1.0);
539
+ std::fill(wts + h, wts + array_size, -1.0);
532
540
 
533
541
  // read the first h_ marks as packed bytes iff we have a gadget
534
542
  uint32_t num_marks_in_h = 0;
535
- std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size));
543
+ std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size, allocator));
536
544
  if (is_gadget) {
537
545
  uint8_t val = 0;
538
- marks = std::unique_ptr<bool, marks_deleter>(AllocBool().allocate(array_size), marks_deleter(array_size));
546
+ marks = std::unique_ptr<bool, marks_deleter>(AllocBool(allocator).allocate(array_size), marks_deleter(array_size, allocator));
539
547
  const size_t size_marks = (h / 8) + (h % 8 > 0 ? 1 : 0);
540
548
  check_memory_size(ptr - base + size_marks, size);
541
549
  for (uint32_t i = 0; i < h; ++i) {
@@ -548,8 +556,8 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
548
556
  }
549
557
 
550
558
  // read the sample items, skipping the gap. Either h_ or r_ may be 0
551
- items_deleter deleter(array_size);
552
- std::unique_ptr<T, items_deleter> items(A().allocate(array_size), deleter);
559
+ items_deleter deleter(array_size, allocator);
560
+ std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
553
561
 
554
562
  ptr += S().deserialize(ptr, end_ptr - ptr, items.get(), h);
555
563
  items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
@@ -558,11 +566,11 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
558
566
  items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid
559
567
 
560
568
  return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size, false,
561
- std::move(items), std::move(weights), num_marks_in_h, std::move(marks));
569
+ std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
562
570
  }
563
571
 
564
572
  template<typename T, typename S, typename A>
565
- var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
573
+ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
566
574
  uint8_t first_byte;
567
575
  is.read((char*)&first_byte, sizeof(first_byte));
568
576
  uint8_t preamble_longs = first_byte & 0x3f;
@@ -586,7 +594,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
586
594
  if (!is.good())
587
595
  throw std::runtime_error("error reading from std::istream");
588
596
  else
589
- return var_opt_sketch<T,S,A>(k, rf, is_gadget);
597
+ return var_opt_sketch<T,S,A>(k, rf, is_gadget, allocator);
590
598
  }
591
599
 
592
600
  // second and third prelongs
@@ -611,7 +619,8 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
611
619
  }
612
620
 
613
621
  // read the first h weights, fill remainder with -1.0
614
- std::unique_ptr<double, weights_deleter> weights(AllocDouble().allocate(array_size), weights_deleter(array_size));
622
+ std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
623
+ weights_deleter(array_size, allocator));
615
624
  double* wts = weights.get(); // to avoid lots of .get() calls -- do not delete
616
625
  is.read((char*)wts, h * sizeof(double));
617
626
  for (size_t i = 0; i < h; ++i) {
@@ -619,13 +628,13 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
619
628
  throw std::invalid_argument("Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
620
629
  }
621
630
  }
622
- std::fill(&wts[h], &wts[array_size], -1.0);
631
+ std::fill(wts + h, wts + array_size, -1.0);
623
632
 
624
633
  // read the first h_ marks as packed bytes iff we have a gadget
625
634
  uint32_t num_marks_in_h = 0;
626
- std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size));
635
+ std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size, allocator));
627
636
  if (is_gadget) {
628
- marks = std::unique_ptr<bool, marks_deleter>(AllocBool().allocate(array_size), marks_deleter(array_size));
637
+ marks = std::unique_ptr<bool, marks_deleter>(AllocBool(allocator).allocate(array_size), marks_deleter(array_size, allocator));
629
638
  uint8_t val = 0;
630
639
  for (uint32_t i = 0; i < h; ++i) {
631
640
  if ((i & 0x7) == 0x0) { // should trigger on first iteration
@@ -637,12 +646,12 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
637
646
  }
638
647
 
639
648
  // read the sample items, skipping the gap. Either h or r may be 0
640
- items_deleter deleter(array_size);
641
- std::unique_ptr<T, items_deleter> items(A().allocate(array_size), deleter);
642
-
649
+ items_deleter deleter(array_size, allocator);
650
+ std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
651
+
643
652
  S().deserialize(is, items.get(), h); // aka &data_[0]
644
653
  items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
645
-
654
+
646
655
  S().deserialize(is, &(items.get()[h + 1]), r);
647
656
  items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid
648
657
 
@@ -650,7 +659,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
650
659
  throw std::runtime_error("error reading from std::istream");
651
660
 
652
661
  return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size, false,
653
- std::move(items), std::move(weights), num_marks_in_h, std::move(marks));
662
+ std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
654
663
  }
655
664
 
656
665
  template<typename T, typename S, typename A>
@@ -672,24 +681,24 @@ void var_opt_sketch<T,S,A>::reset() {
672
681
  // destroy everything
673
682
  const size_t num_to_destroy = std::min(k_ + 1, prev_alloc);
674
683
  for (size_t i = 0; i < num_to_destroy; ++i)
675
- A().destroy(data_ + i);
684
+ allocator_.destroy(data_ + i);
676
685
  } else {
677
686
  // skip gap or anything unused at the end
678
687
  for (size_t i = 0; i < h_; ++i)
679
- A().destroy(data_+ i);
688
+ allocator_.destroy(data_+ i);
680
689
 
681
690
  for (size_t i = h_ + 1; i < h_ + r_ + 1; ++i)
682
- A().destroy(data_ + i);
691
+ allocator_.destroy(data_ + i);
683
692
  }
684
693
 
685
694
  if (curr_items_alloc_ < prev_alloc) {
686
695
  const bool is_gadget = (marks_ != nullptr);
687
696
 
688
- A().deallocate(data_, prev_alloc);
689
- AllocDouble().deallocate(weights_, prev_alloc);
697
+ allocator_.deallocate(data_, prev_alloc);
698
+ AllocDouble(allocator_).deallocate(weights_, prev_alloc);
690
699
 
691
700
  if (marks_ != nullptr)
692
- AllocBool().deallocate(marks_, prev_alloc);
701
+ AllocBool(allocator_).deallocate(marks_, prev_alloc);
693
702
 
694
703
  allocate_data_arrays(curr_items_alloc_, is_gadget);
695
704
  }
@@ -970,11 +979,11 @@ template<typename T, typename S, typename A>
970
979
  void var_opt_sketch<T,S,A>::allocate_data_arrays(uint32_t tgt_size, bool use_marks) {
971
980
  filled_data_ = false;
972
981
 
973
- data_ = A().allocate(tgt_size);
974
- weights_ = AllocDouble().allocate(tgt_size);
982
+ data_ = allocator_.allocate(tgt_size);
983
+ weights_ = AllocDouble(allocator_).allocate(tgt_size);
975
984
 
976
985
  if (use_marks) {
977
- marks_ = AllocBool().allocate(tgt_size);
986
+ marks_ = AllocBool(allocator_).allocate(tgt_size);
978
987
  } else {
979
988
  marks_ = nullptr;
980
989
  }
@@ -991,27 +1000,27 @@ void var_opt_sketch<T,S,A>::grow_data_arrays() {
991
1000
  if (prev_size < curr_items_alloc_) {
992
1001
  filled_data_ = false;
993
1002
 
994
- T* tmp_data = A().allocate(curr_items_alloc_);
995
- double* tmp_weights = AllocDouble().allocate(curr_items_alloc_);
1003
+ T* tmp_data = allocator_.allocate(curr_items_alloc_);
1004
+ double* tmp_weights = AllocDouble(allocator_).allocate(curr_items_alloc_);
996
1005
 
997
1006
  for (uint32_t i = 0; i < prev_size; ++i) {
998
1007
  new (&tmp_data[i]) T(std::move(data_[i]));
999
- A().destroy(data_ + i);
1008
+ allocator_.destroy(data_ + i);
1000
1009
  tmp_weights[i] = weights_[i];
1001
1010
  }
1002
1011
 
1003
- A().deallocate(data_, prev_size);
1004
- AllocDouble().deallocate(weights_, prev_size);
1012
+ allocator_.deallocate(data_, prev_size);
1013
+ AllocDouble(allocator_).deallocate(weights_, prev_size);
1005
1014
 
1006
1015
  data_ = tmp_data;
1007
1016
  weights_ = tmp_weights;
1008
1017
 
1009
1018
  if (marks_ != nullptr) {
1010
- bool* tmp_marks = AllocBool().allocate(curr_items_alloc_);
1019
+ bool* tmp_marks = AllocBool(allocator_).allocate(curr_items_alloc_);
1011
1020
  for (uint32_t i = 0; i < prev_size; ++i) {
1012
1021
  tmp_marks[i] = marks_[i];
1013
1022
  }
1014
- AllocBool().deallocate(marks_, prev_size);
1023
+ AllocBool(allocator_).deallocate(marks_, prev_size);
1015
1024
  marks_ = tmp_marks;
1016
1025
  }
1017
1026
  }
@@ -1296,7 +1305,7 @@ template<typename T, typename S, typename A>
1296
1305
  void var_opt_sketch<T,S,A>::strip_marks() {
1297
1306
  if (marks_ == nullptr) throw std::logic_error("request to strip marks from non-gadget");
1298
1307
  num_marks_in_h_ = 0;
1299
- AllocBool().deallocate(marks_, curr_items_alloc_);
1308
+ AllocBool(allocator_).deallocate(marks_, curr_items_alloc_);
1300
1309
  marks_ = nullptr;
1301
1310
  }
1302
1311
 
@@ -1433,10 +1442,10 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
1433
1442
  template<typename T, typename S, typename A>
1434
1443
  class var_opt_sketch<T, S, A>::items_deleter {
1435
1444
  public:
1436
- items_deleter(uint32_t num) : num(num), h_count(0), r_count(0) {}
1445
+ items_deleter(uint32_t num, const A& allocator) : num(num), h_count(0), r_count(0), allocator(allocator) {}
1437
1446
  void set_h(uint32_t h) { h_count = h; }
1438
1447
  void set_r(uint32_t r) { r_count = r; }
1439
- void operator() (T* ptr) const {
1448
+ void operator() (T* ptr) {
1440
1449
  if (h_count > 0) {
1441
1450
  for (size_t i = 0; i < h_count; ++i) {
1442
1451
  ptr[i].~T();
@@ -1449,39 +1458,42 @@ class var_opt_sketch<T, S, A>::items_deleter {
1449
1458
  }
1450
1459
  }
1451
1460
  if (ptr != nullptr) {
1452
- A().deallocate(ptr, num);
1461
+ allocator.deallocate(ptr, num);
1453
1462
  }
1454
1463
  }
1455
1464
  private:
1456
1465
  uint32_t num;
1457
1466
  uint32_t h_count;
1458
1467
  uint32_t r_count;
1468
+ A allocator;
1459
1469
  };
1460
1470
 
1461
1471
  template<typename T, typename S, typename A>
1462
1472
  class var_opt_sketch<T, S, A>::weights_deleter {
1463
1473
  public:
1464
- weights_deleter(uint32_t num) : num(num) {}
1465
- void operator() (double* ptr) const {
1474
+ weights_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
1475
+ void operator() (double* ptr) {
1466
1476
  if (ptr != nullptr) {
1467
- AllocDouble().deallocate(ptr, num);
1477
+ allocator.deallocate(ptr, num);
1468
1478
  }
1469
1479
  }
1470
1480
  private:
1471
1481
  uint32_t num;
1482
+ AllocDouble allocator;
1472
1483
  };
1473
1484
 
1474
1485
  template<typename T, typename S, typename A>
1475
1486
  class var_opt_sketch<T, S, A>::marks_deleter {
1476
1487
  public:
1477
- marks_deleter(uint32_t num) : num(num) {}
1478
- void operator() (bool* ptr) const {
1488
+ marks_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
1489
+ void operator() (bool* ptr) {
1479
1490
  if (ptr != nullptr) {
1480
- AllocBool().deallocate(ptr, 1);
1491
+ allocator.deallocate(ptr, 1);
1481
1492
  }
1482
1493
  }
1483
1494
  private:
1484
1495
  uint32_t num;
1496
+ AllocBool allocator;
1485
1497
  };
1486
1498
 
1487
1499