datasketches 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (160) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  4. data/ext/datasketches/ext.cpp +1 -1
  5. data/ext/datasketches/ext.h +4 -0
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/fi_wrapper.cpp +6 -8
  8. data/ext/datasketches/hll_wrapper.cpp +13 -14
  9. data/ext/datasketches/kll_wrapper.cpp +28 -76
  10. data/ext/datasketches/theta_wrapper.cpp +27 -41
  11. data/ext/datasketches/vo_wrapper.cpp +4 -6
  12. data/lib/datasketches/version.rb +1 -1
  13. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  14. data/vendor/datasketches-cpp/README.md +4 -4
  15. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
  16. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  17. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  18. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  19. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  20. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
  22. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
  24. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
  25. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  26. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
  27. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
  28. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
  29. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
  30. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
  31. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  33. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  34. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
  35. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  36. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  38. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
  41. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
  44. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
  45. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
  46. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
  47. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  48. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  49. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
  50. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
  51. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
  52. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
  53. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
  54. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
  55. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
  56. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
  57. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
  58. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
  59. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
  60. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
  62. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  63. data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
  64. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
  65. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
  66. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
  67. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
  68. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
  70. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
  71. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
  72. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  74. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
  75. data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
  76. data/vendor/datasketches-cpp/python/README.md +6 -3
  77. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  78. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
  79. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
  80. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  81. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
  82. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
  83. data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
  84. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  85. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  86. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  87. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
  88. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  89. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
  90. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  91. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  92. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  93. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  94. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  95. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  96. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  97. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  98. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  99. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  100. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  101. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
  104. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  105. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
  106. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  107. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  108. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
  109. data/vendor/datasketches-cpp/setup.py +5 -3
  110. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  111. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
  112. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  114. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  115. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  116. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
  117. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
  119. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  120. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  121. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  122. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  123. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
  124. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  125. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  126. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
  127. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
  128. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  129. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  130. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
  131. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  132. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
  133. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
  134. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  135. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
  136. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
  137. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  138. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
  141. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  142. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
  143. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
  144. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
  145. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
  146. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
  147. metadata +43 -34
  148. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  149. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  150. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  151. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  152. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  153. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  154. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  155. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  156. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  157. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  160. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -62,7 +62,7 @@ class var_opt_sketch {
62
62
  static const resize_factor DEFAULT_RESIZE_FACTOR = X8;
63
63
  static const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
64
64
 
65
- explicit var_opt_sketch(uint32_t k, resize_factor rf = DEFAULT_RESIZE_FACTOR);
65
+ explicit var_opt_sketch(uint32_t k, resize_factor rf = DEFAULT_RESIZE_FACTOR, const A& allocator = A());
66
66
  var_opt_sketch(const var_opt_sketch& other);
67
67
  var_opt_sketch(var_opt_sketch&& other) noexcept;
68
68
 
@@ -167,7 +167,7 @@ class var_opt_sketch {
167
167
  * @param is input stream
168
168
  * @return an instance of a sketch
169
169
  */
170
- static var_opt_sketch deserialize(std::istream& is);
170
+ static var_opt_sketch deserialize(std::istream& is, const A& allocator = A());
171
171
 
172
172
  /**
173
173
  * This method deserializes a sketch from a given array of bytes.
@@ -175,7 +175,7 @@ class var_opt_sketch {
175
175
  * @param size the size of the array
176
176
  * @return an instance of a sketch
177
177
  */
178
- static var_opt_sketch deserialize(const void* bytes, size_t size);
178
+ static var_opt_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
179
179
 
180
180
  /**
181
181
  * Prints a summary of the sketch.
@@ -226,8 +226,9 @@ class var_opt_sketch {
226
226
  resize_factor rf_; // resize factor
227
227
 
228
228
  uint32_t curr_items_alloc_; // currently allocated array size
229
- bool filled_data_; // true if we've explciitly set all entries in data_
229
+ bool filled_data_; // true if we've explicitly set all entries in data_
230
230
 
231
+ A allocator_;
231
232
  T* data_; // stored sampled items
232
233
  double* weights_; // weights for sampled items
233
234
 
@@ -249,20 +250,20 @@ class var_opt_sketch {
249
250
  // occurs and is properly tracked.
250
251
  bool* marks_;
251
252
 
252
- // used during deserialization to avoid memork leaks upon errors
253
+ // used during deserialization to avoid memory leaks upon errors
253
254
  class items_deleter;
254
255
  class weights_deleter;
255
256
  class marks_deleter;
256
257
 
257
- var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget);
258
+ var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget, const A& allocator);
258
259
  var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32_t r, uint64_t n, double total_wt_r, resize_factor rf,
259
260
  uint32_t curr_items_alloc, bool filled_data, std::unique_ptr<T, items_deleter> items,
260
261
  std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
261
- std::unique_ptr<bool, marks_deleter> marks);
262
+ std::unique_ptr<bool, marks_deleter> marks, const A& allocator);
262
263
 
263
264
  friend class var_opt_union<T,S,A>;
264
265
  var_opt_sketch(const var_opt_sketch& other, bool as_sketch, uint64_t adjusted_n);
265
- var_opt_sketch(T* data, double* weights, size_t len, uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r);
266
+ var_opt_sketch(T* data, double* weights, size_t len, uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r, const A& allocator);
266
267
 
267
268
  string<A> items_to_string(bool print_gap) const;
268
269
 
@@ -353,7 +354,7 @@ private:
353
354
  double r_item_wt_;
354
355
  size_t idx_;
355
356
  const size_t final_idx_;
356
- bool weight_correction_;
357
+ // bool weight_correction_;
357
358
  };
358
359
 
359
360
  // non-const iterator for internal use
@@ -42,8 +42,8 @@ namespace datasketches {
42
42
  * author Jon Malkin
43
43
  */
44
44
  template<typename T, typename S, typename A>
45
- var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf) :
46
- var_opt_sketch<T,S,A>(k, rf, false) {}
45
+ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, const A& allocator) :
46
+ var_opt_sketch<T,S,A>(k, rf, false, allocator) {}
47
47
 
48
48
  template<typename T, typename S, typename A>
49
49
  var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
@@ -56,12 +56,13 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
56
56
  rf_(other.rf_),
57
57
  curr_items_alloc_(other.curr_items_alloc_),
58
58
  filled_data_(other.filled_data_),
59
+ allocator_(other.allocator_),
59
60
  data_(nullptr),
60
61
  weights_(nullptr),
61
62
  num_marks_in_h_(other.num_marks_in_h_),
62
63
  marks_(nullptr)
63
64
  {
64
- data_ = A().allocate(curr_items_alloc_);
65
+ data_ = allocator_.allocate(curr_items_alloc_);
65
66
  // skip gap or anything unused at the end
66
67
  for (size_t i = 0; i < h_; ++i)
67
68
  new (&data_[i]) T(other.data_[i]);
@@ -71,13 +72,13 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
71
72
  // we skipped the gap
72
73
  filled_data_ = false;
73
74
 
74
- weights_ = AllocDouble().allocate(curr_items_alloc_);
75
+ weights_ = AllocDouble(allocator_).allocate(curr_items_alloc_);
75
76
  // doubles so can successfully copy regardless of the internal state
76
- std::copy(&other.weights_[0], &other.weights_[curr_items_alloc_], weights_);
77
-
77
+ std::copy(other.weights_, other.weights_ + curr_items_alloc_, weights_);
78
+
78
79
  if (other.marks_ != nullptr) {
79
- marks_ = AllocBool().allocate(curr_items_alloc_);
80
- std::copy(&other.marks_[0], &other.marks_[curr_items_alloc_], marks_);
80
+ marks_ = AllocBool(allocator_).allocate(curr_items_alloc_);
81
+ std::copy(other.marks_, other.marks_ + curr_items_alloc_, marks_);
81
82
  }
82
83
  }
83
84
 
@@ -92,12 +93,13 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketc
92
93
  rf_(other.rf_),
93
94
  curr_items_alloc_(other.curr_items_alloc_),
94
95
  filled_data_(other.filled_data_),
96
+ allocator_(other.allocator_),
95
97
  data_(nullptr),
96
98
  weights_(nullptr),
97
99
  num_marks_in_h_(other.num_marks_in_h_),
98
100
  marks_(nullptr)
99
101
  {
100
- data_ = A().allocate(curr_items_alloc_);
102
+ data_ = allocator_.allocate(curr_items_alloc_);
101
103
  // skip gap or anything unused at the end
102
104
  for (size_t i = 0; i < h_; ++i)
103
105
  new (&data_[i]) T(other.data_[i]);
@@ -107,19 +109,19 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketc
107
109
  // we skipped the gap
108
110
  filled_data_ = false;
109
111
 
110
- weights_ = AllocDouble().allocate(curr_items_alloc_);
112
+ weights_ = AllocDouble(allocator_).allocate(curr_items_alloc_);
111
113
  // doubles so can successfully copy regardless of the internal state
112
- std::copy(&other.weights_[0], &other.weights_[curr_items_alloc_], weights_);
114
+ std::copy(other.weights_, other.weights_ + curr_items_alloc_, weights_);
113
115
 
114
116
  if (!as_sketch && other.marks_ != nullptr) {
115
- marks_ = AllocBool().allocate(curr_items_alloc_);
116
- std::copy(&other.marks_[0], &other.marks_[curr_items_alloc_], marks_);
117
+ marks_ = AllocBool(allocator_).allocate(curr_items_alloc_);
118
+ std::copy(other.marks_, other.marks_ + curr_items_alloc_, marks_);
117
119
  }
118
120
  }
119
121
 
120
122
  template<typename T, typename S, typename A>
121
123
  var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
122
- uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r) :
124
+ uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r, const A& allocator) :
123
125
  k_(k),
124
126
  h_(h_count),
125
127
  m_(0),
@@ -129,6 +131,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
129
131
  rf_(DEFAULT_RESIZE_FACTOR),
130
132
  curr_items_alloc_(len),
131
133
  filled_data_(n > k),
134
+ allocator_(allocator),
132
135
  data_(data),
133
136
  weights_(weights),
134
137
  num_marks_in_h_(0),
@@ -146,6 +149,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
146
149
  rf_(other.rf_),
147
150
  curr_items_alloc_(other.curr_items_alloc_),
148
151
  filled_data_(other.filled_data_),
152
+ allocator_(other.allocator_),
149
153
  data_(other.data_),
150
154
  weights_(other.weights_),
151
155
  num_marks_in_h_(other.num_marks_in_h_),
@@ -157,8 +161,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
157
161
  }
158
162
 
159
163
  template<typename T, typename S, typename A>
160
- var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget) :
161
- k_(k), h_(0), m_(0), r_(0), n_(0), total_wt_r_(0.0), rf_(rf) {
164
+ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget, const A& allocator) :
165
+ k_(k), h_(0), m_(0), r_(0), n_(0), total_wt_r_(0.0), rf_(rf), allocator_(allocator) {
162
166
  if (k == 0 || k_ > MAX_K) {
163
167
  throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
164
168
  }
@@ -178,7 +182,7 @@ template<typename T, typename S, typename A>
178
182
  var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32_t r, uint64_t n, double total_wt_r, resize_factor rf,
179
183
  uint32_t curr_items_alloc, bool filled_data, std::unique_ptr<T, items_deleter> items,
180
184
  std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
181
- std::unique_ptr<bool, marks_deleter> marks) :
185
+ std::unique_ptr<bool, marks_deleter> marks, const A& allocator) :
182
186
  k_(k),
183
187
  h_(h),
184
188
  m_(m),
@@ -188,6 +192,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32
188
192
  rf_(rf),
189
193
  curr_items_alloc_(curr_items_alloc),
190
194
  filled_data_(filled_data),
195
+ allocator_(allocator),
191
196
  data_(items.release()),
192
197
  weights_(weights.release()),
193
198
  num_marks_in_h_(num_marks_in_h),
@@ -202,27 +207,27 @@ var_opt_sketch<T,S,A>::~var_opt_sketch() {
202
207
  // destroy everything
203
208
  const size_t num_to_destroy = std::min(k_ + 1, curr_items_alloc_);
204
209
  for (size_t i = 0; i < num_to_destroy; ++i) {
205
- A().destroy(data_ + i);
210
+ allocator_.destroy(data_ + i);
206
211
  }
207
212
  } else {
208
213
  // skip gap or anything unused at the end
209
214
  for (size_t i = 0; i < h_; ++i) {
210
- A().destroy(data_+ i);
215
+ allocator_.destroy(data_+ i);
211
216
  }
212
217
 
213
218
  for (size_t i = h_ + 1; i < h_ + r_ + 1; ++i) {
214
- A().destroy(data_ + i);
219
+ allocator_.destroy(data_ + i);
215
220
  }
216
221
  }
217
- A().deallocate(data_, curr_items_alloc_);
222
+ allocator_.deallocate(data_, curr_items_alloc_);
218
223
  }
219
224
 
220
225
  if (weights_ != nullptr) {
221
- AllocDouble().deallocate(weights_, curr_items_alloc_);
226
+ AllocDouble(allocator_).deallocate(weights_, curr_items_alloc_);
222
227
  }
223
228
 
224
229
  if (marks_ != nullptr) {
225
- AllocBool().deallocate(marks_, curr_items_alloc_);
230
+ AllocBool(allocator_).deallocate(marks_, curr_items_alloc_);
226
231
  }
227
232
  }
228
233
 
@@ -238,6 +243,7 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(const var_opt_sketch& ot
238
243
  std::swap(rf_, sk_copy.rf_);
239
244
  std::swap(curr_items_alloc_, sk_copy.curr_items_alloc_);
240
245
  std::swap(filled_data_, sk_copy.filled_data_);
246
+ std::swap(allocator_, sk_copy.allocator_);
241
247
  std::swap(data_, sk_copy.data_);
242
248
  std::swap(weights_, sk_copy.weights_);
243
249
  std::swap(num_marks_in_h_, sk_copy.num_marks_in_h_);
@@ -256,6 +262,7 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(var_opt_sketch&& other)
256
262
  std::swap(rf_, other.rf_);
257
263
  std::swap(curr_items_alloc_, other.curr_items_alloc_);
258
264
  std::swap(filled_data_, other.filled_data_);
265
+ std::swap(allocator_, other.allocator_);
259
266
  std::swap(data_, other.data_);
260
267
  std::swap(weights_, other.weights_);
261
268
  std::swap(num_marks_in_h_, other.num_marks_in_h_);
@@ -335,7 +342,7 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
335
342
  template<typename T, typename S, typename A>
336
343
  std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned header_size_bytes) const {
337
344
  const size_t size = header_size_bytes + get_serialized_size_bytes();
338
- std::vector<uint8_t, AllocU8<A>> bytes(size);
345
+ std::vector<uint8_t, AllocU8<A>> bytes(size, 0, allocator_);
339
346
  uint8_t* ptr = bytes.data() + header_size_bytes;
340
347
  uint8_t* end_ptr = ptr + size;
341
348
 
@@ -468,7 +475,7 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
468
475
  }
469
476
 
470
477
  template<typename T, typename S, typename A>
471
- var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size) {
478
+ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
472
479
  ensure_minimum_memory(size, 8);
473
480
  const char* ptr = static_cast<const char*>(bytes);
474
481
  const char* base = ptr;
@@ -494,7 +501,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
494
501
  const bool is_gadget = flags & GADGET_FLAG_MASK;
495
502
 
496
503
  if (is_empty) {
497
- return var_opt_sketch<T,S,A>(k, rf, is_gadget);
504
+ return var_opt_sketch<T,S,A>(k, rf, is_gadget, allocator);
498
505
  }
499
506
 
500
507
  // second and third prelongs
@@ -520,7 +527,8 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
520
527
 
521
528
  // read the first h_ weights, fill in rest of array with -1.0
522
529
  check_memory_size(ptr - base + (h * sizeof(double)), size);
523
- std::unique_ptr<double, weights_deleter> weights(AllocDouble().allocate(array_size), weights_deleter(array_size));
530
+ std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
531
+ weights_deleter(array_size, allocator));
524
532
  double* wts = weights.get(); // to avoid lots of .get() calls -- do not delete
525
533
  ptr += copy_from_mem(ptr, wts, h * sizeof(double));
526
534
  for (size_t i = 0; i < h; ++i) {
@@ -528,14 +536,14 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
528
536
  throw std::invalid_argument("Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
529
537
  }
530
538
  }
531
- std::fill(&wts[h], &wts[array_size], -1.0);
539
+ std::fill(wts + h, wts + array_size, -1.0);
532
540
 
533
541
  // read the first h_ marks as packed bytes iff we have a gadget
534
542
  uint32_t num_marks_in_h = 0;
535
- std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size));
543
+ std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size, allocator));
536
544
  if (is_gadget) {
537
545
  uint8_t val = 0;
538
- marks = std::unique_ptr<bool, marks_deleter>(AllocBool().allocate(array_size), marks_deleter(array_size));
546
+ marks = std::unique_ptr<bool, marks_deleter>(AllocBool(allocator).allocate(array_size), marks_deleter(array_size, allocator));
539
547
  const size_t size_marks = (h / 8) + (h % 8 > 0 ? 1 : 0);
540
548
  check_memory_size(ptr - base + size_marks, size);
541
549
  for (uint32_t i = 0; i < h; ++i) {
@@ -548,8 +556,8 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
548
556
  }
549
557
 
550
558
  // read the sample items, skipping the gap. Either h_ or r_ may be 0
551
- items_deleter deleter(array_size);
552
- std::unique_ptr<T, items_deleter> items(A().allocate(array_size), deleter);
559
+ items_deleter deleter(array_size, allocator);
560
+ std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
553
561
 
554
562
  ptr += S().deserialize(ptr, end_ptr - ptr, items.get(), h);
555
563
  items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
@@ -558,11 +566,11 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
558
566
  items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid
559
567
 
560
568
  return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size, false,
561
- std::move(items), std::move(weights), num_marks_in_h, std::move(marks));
569
+ std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
562
570
  }
563
571
 
564
572
  template<typename T, typename S, typename A>
565
- var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
573
+ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
566
574
  uint8_t first_byte;
567
575
  is.read((char*)&first_byte, sizeof(first_byte));
568
576
  uint8_t preamble_longs = first_byte & 0x3f;
@@ -586,7 +594,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
586
594
  if (!is.good())
587
595
  throw std::runtime_error("error reading from std::istream");
588
596
  else
589
- return var_opt_sketch<T,S,A>(k, rf, is_gadget);
597
+ return var_opt_sketch<T,S,A>(k, rf, is_gadget, allocator);
590
598
  }
591
599
 
592
600
  // second and third prelongs
@@ -611,7 +619,8 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
611
619
  }
612
620
 
613
621
  // read the first h weights, fill remainder with -1.0
614
- std::unique_ptr<double, weights_deleter> weights(AllocDouble().allocate(array_size), weights_deleter(array_size));
622
+ std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
623
+ weights_deleter(array_size, allocator));
615
624
  double* wts = weights.get(); // to avoid lots of .get() calls -- do not delete
616
625
  is.read((char*)wts, h * sizeof(double));
617
626
  for (size_t i = 0; i < h; ++i) {
@@ -619,13 +628,13 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
619
628
  throw std::invalid_argument("Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
620
629
  }
621
630
  }
622
- std::fill(&wts[h], &wts[array_size], -1.0);
631
+ std::fill(wts + h, wts + array_size, -1.0);
623
632
 
624
633
  // read the first h_ marks as packed bytes iff we have a gadget
625
634
  uint32_t num_marks_in_h = 0;
626
- std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size));
635
+ std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size, allocator));
627
636
  if (is_gadget) {
628
- marks = std::unique_ptr<bool, marks_deleter>(AllocBool().allocate(array_size), marks_deleter(array_size));
637
+ marks = std::unique_ptr<bool, marks_deleter>(AllocBool(allocator).allocate(array_size), marks_deleter(array_size, allocator));
629
638
  uint8_t val = 0;
630
639
  for (uint32_t i = 0; i < h; ++i) {
631
640
  if ((i & 0x7) == 0x0) { // should trigger on first iteration
@@ -637,12 +646,12 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
637
646
  }
638
647
 
639
648
  // read the sample items, skipping the gap. Either h or r may be 0
640
- items_deleter deleter(array_size);
641
- std::unique_ptr<T, items_deleter> items(A().allocate(array_size), deleter);
642
-
649
+ items_deleter deleter(array_size, allocator);
650
+ std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
651
+
643
652
  S().deserialize(is, items.get(), h); // aka &data_[0]
644
653
  items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
645
-
654
+
646
655
  S().deserialize(is, &(items.get()[h + 1]), r);
647
656
  items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid
648
657
 
@@ -650,7 +659,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
650
659
  throw std::runtime_error("error reading from std::istream");
651
660
 
652
661
  return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size, false,
653
- std::move(items), std::move(weights), num_marks_in_h, std::move(marks));
662
+ std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
654
663
  }
655
664
 
656
665
  template<typename T, typename S, typename A>
@@ -672,24 +681,24 @@ void var_opt_sketch<T,S,A>::reset() {
672
681
  // destroy everything
673
682
  const size_t num_to_destroy = std::min(k_ + 1, prev_alloc);
674
683
  for (size_t i = 0; i < num_to_destroy; ++i)
675
- A().destroy(data_ + i);
684
+ allocator_.destroy(data_ + i);
676
685
  } else {
677
686
  // skip gap or anything unused at the end
678
687
  for (size_t i = 0; i < h_; ++i)
679
- A().destroy(data_+ i);
688
+ allocator_.destroy(data_+ i);
680
689
 
681
690
  for (size_t i = h_ + 1; i < h_ + r_ + 1; ++i)
682
- A().destroy(data_ + i);
691
+ allocator_.destroy(data_ + i);
683
692
  }
684
693
 
685
694
  if (curr_items_alloc_ < prev_alloc) {
686
695
  const bool is_gadget = (marks_ != nullptr);
687
696
 
688
- A().deallocate(data_, prev_alloc);
689
- AllocDouble().deallocate(weights_, prev_alloc);
697
+ allocator_.deallocate(data_, prev_alloc);
698
+ AllocDouble(allocator_).deallocate(weights_, prev_alloc);
690
699
 
691
700
  if (marks_ != nullptr)
692
- AllocBool().deallocate(marks_, prev_alloc);
701
+ AllocBool(allocator_).deallocate(marks_, prev_alloc);
693
702
 
694
703
  allocate_data_arrays(curr_items_alloc_, is_gadget);
695
704
  }
@@ -970,11 +979,11 @@ template<typename T, typename S, typename A>
970
979
  void var_opt_sketch<T,S,A>::allocate_data_arrays(uint32_t tgt_size, bool use_marks) {
971
980
  filled_data_ = false;
972
981
 
973
- data_ = A().allocate(tgt_size);
974
- weights_ = AllocDouble().allocate(tgt_size);
982
+ data_ = allocator_.allocate(tgt_size);
983
+ weights_ = AllocDouble(allocator_).allocate(tgt_size);
975
984
 
976
985
  if (use_marks) {
977
- marks_ = AllocBool().allocate(tgt_size);
986
+ marks_ = AllocBool(allocator_).allocate(tgt_size);
978
987
  } else {
979
988
  marks_ = nullptr;
980
989
  }
@@ -991,27 +1000,27 @@ void var_opt_sketch<T,S,A>::grow_data_arrays() {
991
1000
  if (prev_size < curr_items_alloc_) {
992
1001
  filled_data_ = false;
993
1002
 
994
- T* tmp_data = A().allocate(curr_items_alloc_);
995
- double* tmp_weights = AllocDouble().allocate(curr_items_alloc_);
1003
+ T* tmp_data = allocator_.allocate(curr_items_alloc_);
1004
+ double* tmp_weights = AllocDouble(allocator_).allocate(curr_items_alloc_);
996
1005
 
997
1006
  for (uint32_t i = 0; i < prev_size; ++i) {
998
1007
  new (&tmp_data[i]) T(std::move(data_[i]));
999
- A().destroy(data_ + i);
1008
+ allocator_.destroy(data_ + i);
1000
1009
  tmp_weights[i] = weights_[i];
1001
1010
  }
1002
1011
 
1003
- A().deallocate(data_, prev_size);
1004
- AllocDouble().deallocate(weights_, prev_size);
1012
+ allocator_.deallocate(data_, prev_size);
1013
+ AllocDouble(allocator_).deallocate(weights_, prev_size);
1005
1014
 
1006
1015
  data_ = tmp_data;
1007
1016
  weights_ = tmp_weights;
1008
1017
 
1009
1018
  if (marks_ != nullptr) {
1010
- bool* tmp_marks = AllocBool().allocate(curr_items_alloc_);
1019
+ bool* tmp_marks = AllocBool(allocator_).allocate(curr_items_alloc_);
1011
1020
  for (uint32_t i = 0; i < prev_size; ++i) {
1012
1021
  tmp_marks[i] = marks_[i];
1013
1022
  }
1014
- AllocBool().deallocate(marks_, prev_size);
1023
+ AllocBool(allocator_).deallocate(marks_, prev_size);
1015
1024
  marks_ = tmp_marks;
1016
1025
  }
1017
1026
  }
@@ -1296,7 +1305,7 @@ template<typename T, typename S, typename A>
1296
1305
  void var_opt_sketch<T,S,A>::strip_marks() {
1297
1306
  if (marks_ == nullptr) throw std::logic_error("request to strip marks from non-gadget");
1298
1307
  num_marks_in_h_ = 0;
1299
- AllocBool().deallocate(marks_, curr_items_alloc_);
1308
+ AllocBool(allocator_).deallocate(marks_, curr_items_alloc_);
1300
1309
  marks_ = nullptr;
1301
1310
  }
1302
1311
 
@@ -1433,10 +1442,10 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
1433
1442
  template<typename T, typename S, typename A>
1434
1443
  class var_opt_sketch<T, S, A>::items_deleter {
1435
1444
  public:
1436
- items_deleter(uint32_t num) : num(num), h_count(0), r_count(0) {}
1445
+ items_deleter(uint32_t num, const A& allocator) : num(num), h_count(0), r_count(0), allocator(allocator) {}
1437
1446
  void set_h(uint32_t h) { h_count = h; }
1438
1447
  void set_r(uint32_t r) { r_count = r; }
1439
- void operator() (T* ptr) const {
1448
+ void operator() (T* ptr) {
1440
1449
  if (h_count > 0) {
1441
1450
  for (size_t i = 0; i < h_count; ++i) {
1442
1451
  ptr[i].~T();
@@ -1449,39 +1458,42 @@ class var_opt_sketch<T, S, A>::items_deleter {
1449
1458
  }
1450
1459
  }
1451
1460
  if (ptr != nullptr) {
1452
- A().deallocate(ptr, num);
1461
+ allocator.deallocate(ptr, num);
1453
1462
  }
1454
1463
  }
1455
1464
  private:
1456
1465
  uint32_t num;
1457
1466
  uint32_t h_count;
1458
1467
  uint32_t r_count;
1468
+ A allocator;
1459
1469
  };
1460
1470
 
1461
1471
  template<typename T, typename S, typename A>
1462
1472
  class var_opt_sketch<T, S, A>::weights_deleter {
1463
1473
  public:
1464
- weights_deleter(uint32_t num) : num(num) {}
1465
- void operator() (double* ptr) const {
1474
+ weights_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
1475
+ void operator() (double* ptr) {
1466
1476
  if (ptr != nullptr) {
1467
- AllocDouble().deallocate(ptr, num);
1477
+ allocator.deallocate(ptr, num);
1468
1478
  }
1469
1479
  }
1470
1480
  private:
1471
1481
  uint32_t num;
1482
+ AllocDouble allocator;
1472
1483
  };
1473
1484
 
1474
1485
  template<typename T, typename S, typename A>
1475
1486
  class var_opt_sketch<T, S, A>::marks_deleter {
1476
1487
  public:
1477
- marks_deleter(uint32_t num) : num(num) {}
1478
- void operator() (bool* ptr) const {
1488
+ marks_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
1489
+ void operator() (bool* ptr) {
1479
1490
  if (ptr != nullptr) {
1480
- AllocBool().deallocate(ptr, 1);
1491
+ allocator.deallocate(ptr, 1);
1481
1492
  }
1482
1493
  }
1483
1494
  private:
1484
1495
  uint32_t num;
1496
+ AllocBool allocator;
1485
1497
  };
1486
1498
 
1487
1499