datasketches 0.1.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (205) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +17 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  6. data/ext/datasketches/ext.cpp +1 -1
  7. data/ext/datasketches/ext.h +4 -0
  8. data/ext/datasketches/extconf.rb +1 -1
  9. data/ext/datasketches/fi_wrapper.cpp +6 -8
  10. data/ext/datasketches/hll_wrapper.cpp +13 -14
  11. data/ext/datasketches/kll_wrapper.cpp +28 -76
  12. data/ext/datasketches/theta_wrapper.cpp +27 -41
  13. data/ext/datasketches/vo_wrapper.cpp +4 -6
  14. data/lib/datasketches/version.rb +1 -1
  15. data/vendor/datasketches-cpp/CMakeLists.txt +10 -0
  16. data/vendor/datasketches-cpp/LICENSE +40 -3
  17. data/vendor/datasketches-cpp/NOTICE +1 -1
  18. data/vendor/datasketches-cpp/README.md +4 -4
  19. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +18 -7
  20. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  21. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  24. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  25. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  26. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  27. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  28. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  29. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +13 -3
  31. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +20 -20
  32. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +116 -105
  33. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +22 -6
  34. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +140 -101
  35. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  36. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +20 -20
  37. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -16
  38. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +6 -6
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +10 -10
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +21 -21
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  42. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  43. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  46. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  47. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +102 -105
  48. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  49. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +141 -125
  50. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  51. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +5 -5
  52. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  53. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +81 -109
  54. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +25 -24
  55. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  56. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +5 -5
  57. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +89 -105
  58. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +13 -13
  59. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +130 -165
  60. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +21 -22
  61. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  62. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  63. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  64. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  65. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +88 -83
  66. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  67. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +34 -45
  68. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +7 -8
  69. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +41 -52
  70. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +7 -8
  71. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +220 -251
  72. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +42 -42
  73. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +36 -38
  74. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  75. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +15 -14
  76. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +47 -44
  77. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +62 -87
  78. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +121 -128
  79. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  80. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  81. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  82. data/vendor/datasketches-cpp/hll/include/hll.hpp +25 -53
  83. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +8 -8
  84. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +36 -36
  85. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +28 -28
  86. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  87. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +37 -37
  88. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +57 -61
  89. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  90. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  91. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  92. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  93. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  94. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  95. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +40 -25
  96. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +50 -6
  97. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +164 -136
  98. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  99. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  100. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  101. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  102. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +178 -88
  103. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  104. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  105. data/vendor/datasketches-cpp/python/CMakeLists.txt +12 -6
  106. data/vendor/datasketches-cpp/python/README.md +52 -49
  107. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  108. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  109. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  110. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -6
  111. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +4 -2
  112. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  113. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +38 -28
  114. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  115. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  116. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -2
  117. data/vendor/datasketches-cpp/python/tests/kll_test.py +5 -5
  118. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  119. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  120. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  121. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  122. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  123. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +18 -8
  124. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  125. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +488 -0
  126. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  127. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  128. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  129. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  130. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  131. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  132. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  133. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  134. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  135. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  136. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  137. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  138. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +19 -13
  139. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +130 -127
  140. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  141. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +41 -49
  142. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  143. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  144. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  145. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -44
  146. data/vendor/datasketches-cpp/setup.py +11 -6
  147. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  148. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +3 -2
  149. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  150. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  151. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
  152. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  153. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  154. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  155. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +11 -4
  156. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  157. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +26 -28
  158. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  159. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  160. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  161. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  162. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +24 -36
  163. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  164. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  165. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +163 -256
  166. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +250 -651
  167. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  168. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  169. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +6 -1
  170. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  171. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +10 -21
  172. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +44 -30
  173. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  174. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  175. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  176. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +60 -5
  177. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +74 -235
  178. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
  179. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  180. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
  181. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  182. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  183. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +57 -70
  184. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  185. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  186. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +18 -21
  187. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +13 -16
  188. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +7 -6
  189. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +3 -3
  190. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
  191. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +13 -16
  192. metadata +51 -36
  193. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  194. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  195. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  196. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  197. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  198. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  199. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  200. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  201. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  202. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  203. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  204. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  205. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -42,8 +42,8 @@ namespace datasketches {
42
42
  * author Jon Malkin
43
43
  */
44
44
  template<typename T, typename S, typename A>
45
- var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf) :
46
- var_opt_sketch<T,S,A>(k, rf, false) {}
45
+ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, const A& allocator) :
46
+ var_opt_sketch<T,S,A>(k, rf, false, allocator) {}
47
47
 
48
48
  template<typename T, typename S, typename A>
49
49
  var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
@@ -56,12 +56,13 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
56
56
  rf_(other.rf_),
57
57
  curr_items_alloc_(other.curr_items_alloc_),
58
58
  filled_data_(other.filled_data_),
59
+ allocator_(other.allocator_),
59
60
  data_(nullptr),
60
61
  weights_(nullptr),
61
62
  num_marks_in_h_(other.num_marks_in_h_),
62
63
  marks_(nullptr)
63
64
  {
64
- data_ = A().allocate(curr_items_alloc_);
65
+ data_ = allocator_.allocate(curr_items_alloc_);
65
66
  // skip gap or anything unused at the end
66
67
  for (size_t i = 0; i < h_; ++i)
67
68
  new (&data_[i]) T(other.data_[i]);
@@ -71,13 +72,13 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
71
72
  // we skipped the gap
72
73
  filled_data_ = false;
73
74
 
74
- weights_ = AllocDouble().allocate(curr_items_alloc_);
75
+ weights_ = AllocDouble(allocator_).allocate(curr_items_alloc_);
75
76
  // doubles so can successfully copy regardless of the internal state
76
- std::copy(&other.weights_[0], &other.weights_[curr_items_alloc_], weights_);
77
-
77
+ std::copy(other.weights_, other.weights_ + curr_items_alloc_, weights_);
78
+
78
79
  if (other.marks_ != nullptr) {
79
- marks_ = AllocBool().allocate(curr_items_alloc_);
80
- std::copy(&other.marks_[0], &other.marks_[curr_items_alloc_], marks_);
80
+ marks_ = AllocBool(allocator_).allocate(curr_items_alloc_);
81
+ std::copy(other.marks_, other.marks_ + curr_items_alloc_, marks_);
81
82
  }
82
83
  }
83
84
 
@@ -92,12 +93,13 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketc
92
93
  rf_(other.rf_),
93
94
  curr_items_alloc_(other.curr_items_alloc_),
94
95
  filled_data_(other.filled_data_),
96
+ allocator_(other.allocator_),
95
97
  data_(nullptr),
96
98
  weights_(nullptr),
97
99
  num_marks_in_h_(other.num_marks_in_h_),
98
100
  marks_(nullptr)
99
101
  {
100
- data_ = A().allocate(curr_items_alloc_);
102
+ data_ = allocator_.allocate(curr_items_alloc_);
101
103
  // skip gap or anything unused at the end
102
104
  for (size_t i = 0; i < h_; ++i)
103
105
  new (&data_[i]) T(other.data_[i]);
@@ -107,28 +109,29 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketc
107
109
  // we skipped the gap
108
110
  filled_data_ = false;
109
111
 
110
- weights_ = AllocDouble().allocate(curr_items_alloc_);
112
+ weights_ = AllocDouble(allocator_).allocate(curr_items_alloc_);
111
113
  // doubles so can successfully copy regardless of the internal state
112
- std::copy(&other.weights_[0], &other.weights_[curr_items_alloc_], weights_);
114
+ std::copy(other.weights_, other.weights_ + curr_items_alloc_, weights_);
113
115
 
114
116
  if (!as_sketch && other.marks_ != nullptr) {
115
- marks_ = AllocBool().allocate(curr_items_alloc_);
116
- std::copy(&other.marks_[0], &other.marks_[curr_items_alloc_], marks_);
117
+ marks_ = AllocBool(allocator_).allocate(curr_items_alloc_);
118
+ std::copy(other.marks_, other.marks_ + curr_items_alloc_, marks_);
117
119
  }
118
120
  }
119
121
 
120
122
  template<typename T, typename S, typename A>
121
123
  var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
122
- uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r) :
124
+ uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r, const A& allocator) :
123
125
  k_(k),
124
126
  h_(h_count),
125
127
  m_(0),
126
128
  r_(r_count),
127
129
  n_(n),
128
130
  total_wt_r_(total_wt_r),
129
- rf_(DEFAULT_RESIZE_FACTOR),
131
+ rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
130
132
  curr_items_alloc_(len),
131
133
  filled_data_(n > k),
134
+ allocator_(allocator),
132
135
  data_(data),
133
136
  weights_(weights),
134
137
  num_marks_in_h_(0),
@@ -146,6 +149,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
146
149
  rf_(other.rf_),
147
150
  curr_items_alloc_(other.curr_items_alloc_),
148
151
  filled_data_(other.filled_data_),
152
+ allocator_(other.allocator_),
149
153
  data_(other.data_),
150
154
  weights_(other.weights_),
151
155
  num_marks_in_h_(other.num_marks_in_h_),
@@ -157,8 +161,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
157
161
  }
158
162
 
159
163
  template<typename T, typename S, typename A>
160
- var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget) :
161
- k_(k), h_(0), m_(0), r_(0), n_(0), total_wt_r_(0.0), rf_(rf) {
164
+ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget, const A& allocator) :
165
+ k_(k), h_(0), m_(0), r_(0), n_(0), total_wt_r_(0.0), rf_(rf), allocator_(allocator) {
162
166
  if (k == 0 || k_ > MAX_K) {
163
167
  throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
164
168
  }
@@ -178,7 +182,7 @@ template<typename T, typename S, typename A>
178
182
  var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32_t r, uint64_t n, double total_wt_r, resize_factor rf,
179
183
  uint32_t curr_items_alloc, bool filled_data, std::unique_ptr<T, items_deleter> items,
180
184
  std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
181
- std::unique_ptr<bool, marks_deleter> marks) :
185
+ std::unique_ptr<bool, marks_deleter> marks, const A& allocator) :
182
186
  k_(k),
183
187
  h_(h),
184
188
  m_(m),
@@ -188,6 +192,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32
188
192
  rf_(rf),
189
193
  curr_items_alloc_(curr_items_alloc),
190
194
  filled_data_(filled_data),
195
+ allocator_(allocator),
191
196
  data_(items.release()),
192
197
  weights_(weights.release()),
193
198
  num_marks_in_h_(num_marks_in_h),
@@ -202,27 +207,27 @@ var_opt_sketch<T,S,A>::~var_opt_sketch() {
202
207
  // destroy everything
203
208
  const size_t num_to_destroy = std::min(k_ + 1, curr_items_alloc_);
204
209
  for (size_t i = 0; i < num_to_destroy; ++i) {
205
- A().destroy(data_ + i);
210
+ allocator_.destroy(data_ + i);
206
211
  }
207
212
  } else {
208
213
  // skip gap or anything unused at the end
209
214
  for (size_t i = 0; i < h_; ++i) {
210
- A().destroy(data_+ i);
215
+ allocator_.destroy(data_+ i);
211
216
  }
212
217
 
213
218
  for (size_t i = h_ + 1; i < h_ + r_ + 1; ++i) {
214
- A().destroy(data_ + i);
219
+ allocator_.destroy(data_ + i);
215
220
  }
216
221
  }
217
- A().deallocate(data_, curr_items_alloc_);
222
+ allocator_.deallocate(data_, curr_items_alloc_);
218
223
  }
219
224
 
220
225
  if (weights_ != nullptr) {
221
- AllocDouble().deallocate(weights_, curr_items_alloc_);
226
+ AllocDouble(allocator_).deallocate(weights_, curr_items_alloc_);
222
227
  }
223
228
 
224
229
  if (marks_ != nullptr) {
225
- AllocBool().deallocate(marks_, curr_items_alloc_);
230
+ AllocBool(allocator_).deallocate(marks_, curr_items_alloc_);
226
231
  }
227
232
  }
228
233
 
@@ -238,6 +243,7 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(const var_opt_sketch& ot
238
243
  std::swap(rf_, sk_copy.rf_);
239
244
  std::swap(curr_items_alloc_, sk_copy.curr_items_alloc_);
240
245
  std::swap(filled_data_, sk_copy.filled_data_);
246
+ std::swap(allocator_, sk_copy.allocator_);
241
247
  std::swap(data_, sk_copy.data_);
242
248
  std::swap(weights_, sk_copy.weights_);
243
249
  std::swap(num_marks_in_h_, sk_copy.num_marks_in_h_);
@@ -256,6 +262,7 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(var_opt_sketch&& other)
256
262
  std::swap(rf_, other.rf_);
257
263
  std::swap(curr_items_alloc_, other.curr_items_alloc_);
258
264
  std::swap(filled_data_, other.filled_data_);
265
+ std::swap(allocator_, other.allocator_);
259
266
  std::swap(data_, other.data_);
260
267
  std::swap(weights_, other.weights_);
261
268
  std::swap(num_marks_in_h_, other.num_marks_in_h_);
@@ -327,7 +334,7 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
327
334
  num_bytes += (h_ / 8) + (h_ % 8 > 0);
328
335
  }
329
336
  // must iterate over the items
330
- for (auto& it: *this)
337
+ for (auto it: *this)
331
338
  num_bytes += S().size_of_item(it.first);
332
339
  return num_bytes;
333
340
  }
@@ -335,7 +342,7 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
335
342
  template<typename T, typename S, typename A>
336
343
  std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned header_size_bytes) const {
337
344
  const size_t size = header_size_bytes + get_serialized_size_bytes();
338
- std::vector<uint8_t, AllocU8<A>> bytes(size);
345
+ std::vector<uint8_t, AllocU8<A>> bytes(size, 0, allocator_);
339
346
  uint8_t* ptr = bytes.data() + header_size_bytes;
340
347
  uint8_t* end_ptr = ptr + size;
341
348
 
@@ -352,21 +359,21 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
352
359
  // first prelong
353
360
  uint8_t ser_ver(SER_VER);
354
361
  uint8_t family(FAMILY_ID);
355
- ptr += copy_to_mem(&first_byte, ptr, sizeof(uint8_t));
356
- ptr += copy_to_mem(&ser_ver, ptr, sizeof(uint8_t));
357
- ptr += copy_to_mem(&family, ptr, sizeof(uint8_t));
358
- ptr += copy_to_mem(&flags, ptr, sizeof(uint8_t));
359
- ptr += copy_to_mem(&k_, ptr, sizeof(uint32_t));
362
+ ptr += copy_to_mem(first_byte, ptr);
363
+ ptr += copy_to_mem(ser_ver, ptr);
364
+ ptr += copy_to_mem(family, ptr);
365
+ ptr += copy_to_mem(flags, ptr);
366
+ ptr += copy_to_mem(k_, ptr);
360
367
 
361
368
  if (!empty) {
362
369
  // second and third prelongs
363
- ptr += copy_to_mem(&n_, ptr, sizeof(uint64_t));
364
- ptr += copy_to_mem(&h_, ptr, sizeof(uint32_t));
365
- ptr += copy_to_mem(&r_, ptr, sizeof(uint32_t));
370
+ ptr += copy_to_mem(n_, ptr);
371
+ ptr += copy_to_mem(h_, ptr);
372
+ ptr += copy_to_mem(r_, ptr);
366
373
 
367
374
  // fourth prelong, if needed
368
375
  if (r_ > 0) {
369
- ptr += copy_to_mem(&total_wt_r_, ptr, sizeof(double));
376
+ ptr += copy_to_mem(total_wt_r_, ptr);
370
377
  }
371
378
 
372
379
  // first h_ weights
@@ -381,14 +388,14 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
381
388
  }
382
389
 
383
390
  if ((i & 0x7) == 0x7) {
384
- ptr += copy_to_mem(&val, ptr, sizeof(uint8_t));
391
+ ptr += copy_to_mem(val, ptr);
385
392
  val = 0;
386
393
  }
387
394
  }
388
395
 
389
396
  // write out any remaining values
390
397
  if ((h_ & 0x7) > 0) {
391
- ptr += copy_to_mem(&val, ptr, sizeof(uint8_t));
398
+ ptr += copy_to_mem(val, ptr);
392
399
  }
393
400
  }
394
401
 
@@ -421,25 +428,25 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
421
428
  // first prelong
422
429
  const uint8_t ser_ver(SER_VER);
423
430
  const uint8_t family(FAMILY_ID);
424
- os.write((char*)&first_byte, sizeof(uint8_t));
425
- os.write((char*)&ser_ver, sizeof(uint8_t));
426
- os.write((char*)&family, sizeof(uint8_t));
427
- os.write((char*)&flags, sizeof(uint8_t));
428
- os.write((char*)&k_, sizeof(uint32_t));
431
+ write(os, first_byte);
432
+ write(os, ser_ver);
433
+ write(os, family);
434
+ write(os, flags);
435
+ write(os, k_);
429
436
 
430
437
  if (!empty) {
431
438
  // second and third prelongs
432
- os.write((char*)&n_, sizeof(uint64_t));
433
- os.write((char*)&h_, sizeof(uint32_t));
434
- os.write((char*)&r_, sizeof(uint32_t));
439
+ write(os, n_);
440
+ write(os, h_);
441
+ write(os, r_);
435
442
 
436
443
  // fourth prelong, if needed
437
444
  if (r_ > 0) {
438
- os.write((char*)&total_wt_r_, sizeof(double));
445
+ write(os, total_wt_r_);
439
446
  }
440
447
 
441
448
  // write the first h_ weights
442
- os.write((char*)weights_, h_ * sizeof(double));
449
+ write(os, weights_, h_ * sizeof(double));
443
450
 
444
451
  // write the first h_ marks as packed bytes iff we have a gadget
445
452
  if (marks_ != nullptr) {
@@ -450,14 +457,14 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
450
457
  }
451
458
 
452
459
  if ((i & 0x7) == 0x7) {
453
- os.write((char*)&val, sizeof(uint8_t));
460
+ write(os, val);
454
461
  val = 0;
455
462
  }
456
463
  }
457
464
 
458
465
  // write out any remaining values
459
466
  if ((h_ & 0x7) > 0) {
460
- os.write((char*)&val, sizeof(uint8_t));
467
+ write(os, val);
461
468
  }
462
469
  }
463
470
 
@@ -468,23 +475,23 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
468
475
  }
469
476
 
470
477
  template<typename T, typename S, typename A>
471
- var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size) {
478
+ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
472
479
  ensure_minimum_memory(size, 8);
473
480
  const char* ptr = static_cast<const char*>(bytes);
474
481
  const char* base = ptr;
475
482
  const char* end_ptr = ptr + size;
476
483
  uint8_t first_byte;
477
- ptr += copy_from_mem(ptr, &first_byte, sizeof(first_byte));
484
+ ptr += copy_from_mem(ptr, first_byte);
478
485
  uint8_t preamble_longs = first_byte & 0x3f;
479
486
  resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
480
487
  uint8_t serial_version;
481
- ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
488
+ ptr += copy_from_mem(ptr, serial_version);
482
489
  uint8_t family_id;
483
- ptr += copy_from_mem(ptr, &family_id, sizeof(family_id));
490
+ ptr += copy_from_mem(ptr, family_id);
484
491
  uint8_t flags;
485
- ptr += copy_from_mem(ptr, &flags, sizeof(flags));
492
+ ptr += copy_from_mem(ptr, flags);
486
493
  uint32_t k;
487
- ptr += copy_from_mem(ptr, &k, sizeof(k));
494
+ ptr += copy_from_mem(ptr, k);
488
495
 
489
496
  check_preamble_longs(preamble_longs, flags);
490
497
  check_family_and_serialization_version(family_id, serial_version);
@@ -494,22 +501,22 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
494
501
  const bool is_gadget = flags & GADGET_FLAG_MASK;
495
502
 
496
503
  if (is_empty) {
497
- return var_opt_sketch<T,S,A>(k, rf, is_gadget);
504
+ return var_opt_sketch<T,S,A>(k, rf, is_gadget, allocator);
498
505
  }
499
506
 
500
507
  // second and third prelongs
501
508
  uint64_t n;
502
509
  uint32_t h, r;
503
- ptr += copy_from_mem(ptr, &n, sizeof(n));
504
- ptr += copy_from_mem(ptr, &h, sizeof(h));
505
- ptr += copy_from_mem(ptr, &r, sizeof(r));
510
+ ptr += copy_from_mem(ptr, n);
511
+ ptr += copy_from_mem(ptr, h);
512
+ ptr += copy_from_mem(ptr, r);
506
513
 
507
514
  const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
508
515
 
509
516
  // current_items_alloc_ is set but validate R region weight (4th prelong), if needed, before allocating
510
517
  double total_wt_r = 0.0;
511
518
  if (preamble_longs == PREAMBLE_LONGS_FULL) {
512
- ptr += copy_from_mem(ptr, &total_wt_r, sizeof(total_wt_r));
519
+ ptr += copy_from_mem(ptr, total_wt_r);
513
520
  if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
514
521
  throw std::invalid_argument("Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
515
522
  "Found r = " + std::to_string(r) + ", R region weight = " + std::to_string(total_wt_r));
@@ -520,7 +527,8 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
520
527
 
521
528
  // read the first h_ weights, fill in rest of array with -1.0
522
529
  check_memory_size(ptr - base + (h * sizeof(double)), size);
523
- std::unique_ptr<double, weights_deleter> weights(AllocDouble().allocate(array_size), weights_deleter(array_size));
530
+ std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
531
+ weights_deleter(array_size, allocator));
524
532
  double* wts = weights.get(); // to avoid lots of .get() calls -- do not delete
525
533
  ptr += copy_from_mem(ptr, wts, h * sizeof(double));
526
534
  for (size_t i = 0; i < h; ++i) {
@@ -528,19 +536,19 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
528
536
  throw std::invalid_argument("Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
529
537
  }
530
538
  }
531
- std::fill(&wts[h], &wts[array_size], -1.0);
539
+ std::fill(wts + h, wts + array_size, -1.0);
532
540
 
533
541
  // read the first h_ marks as packed bytes iff we have a gadget
534
542
  uint32_t num_marks_in_h = 0;
535
- std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size));
543
+ std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size, allocator));
536
544
  if (is_gadget) {
537
545
  uint8_t val = 0;
538
- marks = std::unique_ptr<bool, marks_deleter>(AllocBool().allocate(array_size), marks_deleter(array_size));
546
+ marks = std::unique_ptr<bool, marks_deleter>(AllocBool(allocator).allocate(array_size), marks_deleter(array_size, allocator));
539
547
  const size_t size_marks = (h / 8) + (h % 8 > 0 ? 1 : 0);
540
548
  check_memory_size(ptr - base + size_marks, size);
541
549
  for (uint32_t i = 0; i < h; ++i) {
542
550
  if ((i & 0x7) == 0x0) { // should trigger on first iteration
543
- ptr += copy_from_mem(ptr, &val, sizeof(val));
551
+ ptr += copy_from_mem(ptr, val);
544
552
  }
545
553
  marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
546
554
  num_marks_in_h += (marks.get()[i] ? 1 : 0);
@@ -548,8 +556,8 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
548
556
  }
549
557
 
550
558
  // read the sample items, skipping the gap. Either h_ or r_ may be 0
551
- items_deleter deleter(array_size);
552
- std::unique_ptr<T, items_deleter> items(A().allocate(array_size), deleter);
559
+ items_deleter deleter(array_size, allocator);
560
+ std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
553
561
 
554
562
  ptr += S().deserialize(ptr, end_ptr - ptr, items.get(), h);
555
563
  items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
@@ -558,23 +566,18 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
558
566
  items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid
559
567
 
560
568
  return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size, false,
561
- std::move(items), std::move(weights), num_marks_in_h, std::move(marks));
569
+ std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
562
570
  }
563
571
 
564
572
  template<typename T, typename S, typename A>
565
- var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
566
- uint8_t first_byte;
567
- is.read((char*)&first_byte, sizeof(first_byte));
573
+ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
574
+ const auto first_byte = read<uint8_t>(is);
568
575
  uint8_t preamble_longs = first_byte & 0x3f;
569
- resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
570
- uint8_t serial_version;
571
- is.read((char*)&serial_version, sizeof(serial_version));
572
- uint8_t family_id;
573
- is.read((char*)&family_id, sizeof(family_id));
574
- uint8_t flags;
575
- is.read((char*)&flags, sizeof(flags));
576
- uint32_t k;
577
- is.read((char*)&k, sizeof(k));
576
+ const resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
577
+ const auto serial_version = read<uint8_t>(is);
578
+ const auto family_id = read<uint8_t>(is);
579
+ const auto flags = read<uint8_t>(is);
580
+ const auto k = read<uint32_t>(is);
578
581
 
579
582
  check_preamble_longs(preamble_longs, flags);
580
583
  check_family_and_serialization_version(family_id, serial_version);
@@ -586,50 +589,47 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
586
589
  if (!is.good())
587
590
  throw std::runtime_error("error reading from std::istream");
588
591
  else
589
- return var_opt_sketch<T,S,A>(k, rf, is_gadget);
592
+ return var_opt_sketch<T,S,A>(k, rf, is_gadget, allocator);
590
593
  }
591
594
 
592
595
  // second and third prelongs
593
- uint64_t n;
594
- uint32_t h, r;
595
- is.read((char*)&n, sizeof(n));
596
- is.read((char*)&h, sizeof(h));
597
- is.read((char*)&r, sizeof(r));
596
+ const auto n = read<uint64_t>(is);
597
+ const auto h = read<uint32_t>(is);
598
+ const auto r = read<uint32_t>(is);
598
599
 
599
600
  const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
600
601
 
601
602
  // current_items_alloc_ is set but validate R region weight (4th prelong), if needed, before allocating
602
603
  double total_wt_r = 0.0;
603
604
  if (preamble_longs == PREAMBLE_LONGS_FULL) {
604
- is.read((char*)&total_wt_r, sizeof(total_wt_r));
605
+ total_wt_r = read<double>(is);
605
606
  if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
606
607
  throw std::invalid_argument("Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
607
608
  "Found r = " + std::to_string(r) + ", R region weight = " + std::to_string(total_wt_r));
608
609
  }
609
- } else {
610
- total_wt_r = 0.0;
611
610
  }
612
611
 
613
612
  // read the first h weights, fill remainder with -1.0
614
- std::unique_ptr<double, weights_deleter> weights(AllocDouble().allocate(array_size), weights_deleter(array_size));
613
+ std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
614
+ weights_deleter(array_size, allocator));
615
615
  double* wts = weights.get(); // to avoid lots of .get() calls -- do not delete
616
- is.read((char*)wts, h * sizeof(double));
616
+ read(is, wts, h * sizeof(double));
617
617
  for (size_t i = 0; i < h; ++i) {
618
618
  if (!(wts[i] > 0.0)) {
619
619
  throw std::invalid_argument("Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
620
620
  }
621
621
  }
622
- std::fill(&wts[h], &wts[array_size], -1.0);
622
+ std::fill(wts + h, wts + array_size, -1.0);
623
623
 
624
624
  // read the first h_ marks as packed bytes iff we have a gadget
625
625
  uint32_t num_marks_in_h = 0;
626
- std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size));
626
+ std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size, allocator));
627
627
  if (is_gadget) {
628
- marks = std::unique_ptr<bool, marks_deleter>(AllocBool().allocate(array_size), marks_deleter(array_size));
628
+ marks = std::unique_ptr<bool, marks_deleter>(AllocBool(allocator).allocate(array_size), marks_deleter(array_size, allocator));
629
629
  uint8_t val = 0;
630
630
  for (uint32_t i = 0; i < h; ++i) {
631
631
  if ((i & 0x7) == 0x0) { // should trigger on first iteration
632
- is.read((char*)&val, sizeof(val));
632
+ val = read<uint8_t>(is);
633
633
  }
634
634
  marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
635
635
  num_marks_in_h += (marks.get()[i] ? 1 : 0);
@@ -637,12 +637,12 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
637
637
  }
638
638
 
639
639
  // read the sample items, skipping the gap. Either h or r may be 0
640
- items_deleter deleter(array_size);
641
- std::unique_ptr<T, items_deleter> items(A().allocate(array_size), deleter);
642
-
640
+ items_deleter deleter(array_size, allocator);
641
+ std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
642
+
643
643
  S().deserialize(is, items.get(), h); // aka &data_[0]
644
644
  items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
645
-
645
+
646
646
  S().deserialize(is, &(items.get()[h + 1]), r);
647
647
  items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid
648
648
 
@@ -650,7 +650,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
650
650
  throw std::runtime_error("error reading from std::istream");
651
651
 
652
652
  return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size, false,
653
- std::move(items), std::move(weights), num_marks_in_h, std::move(marks));
653
+ std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
654
654
  }
655
655
 
656
656
  template<typename T, typename S, typename A>
@@ -672,24 +672,24 @@ void var_opt_sketch<T,S,A>::reset() {
672
672
  // destroy everything
673
673
  const size_t num_to_destroy = std::min(k_ + 1, prev_alloc);
674
674
  for (size_t i = 0; i < num_to_destroy; ++i)
675
- A().destroy(data_ + i);
675
+ allocator_.destroy(data_ + i);
676
676
  } else {
677
677
  // skip gap or anything unused at the end
678
678
  for (size_t i = 0; i < h_; ++i)
679
- A().destroy(data_+ i);
679
+ allocator_.destroy(data_+ i);
680
680
 
681
681
  for (size_t i = h_ + 1; i < h_ + r_ + 1; ++i)
682
- A().destroy(data_ + i);
682
+ allocator_.destroy(data_ + i);
683
683
  }
684
684
 
685
685
  if (curr_items_alloc_ < prev_alloc) {
686
686
  const bool is_gadget = (marks_ != nullptr);
687
687
 
688
- A().deallocate(data_, prev_alloc);
689
- AllocDouble().deallocate(weights_, prev_alloc);
688
+ allocator_.deallocate(data_, prev_alloc);
689
+ AllocDouble(allocator_).deallocate(weights_, prev_alloc);
690
690
 
691
691
  if (marks_ != nullptr)
692
- AllocBool().deallocate(marks_, prev_alloc);
692
+ AllocBool(allocator_).deallocate(marks_, prev_alloc);
693
693
 
694
694
  allocate_data_arrays(curr_items_alloc_, is_gadget);
695
695
  }
@@ -970,11 +970,11 @@ template<typename T, typename S, typename A>
970
970
  void var_opt_sketch<T,S,A>::allocate_data_arrays(uint32_t tgt_size, bool use_marks) {
971
971
  filled_data_ = false;
972
972
 
973
- data_ = A().allocate(tgt_size);
974
- weights_ = AllocDouble().allocate(tgt_size);
973
+ data_ = allocator_.allocate(tgt_size);
974
+ weights_ = AllocDouble(allocator_).allocate(tgt_size);
975
975
 
976
976
  if (use_marks) {
977
- marks_ = AllocBool().allocate(tgt_size);
977
+ marks_ = AllocBool(allocator_).allocate(tgt_size);
978
978
  } else {
979
979
  marks_ = nullptr;
980
980
  }
@@ -991,27 +991,27 @@ void var_opt_sketch<T,S,A>::grow_data_arrays() {
991
991
  if (prev_size < curr_items_alloc_) {
992
992
  filled_data_ = false;
993
993
 
994
- T* tmp_data = A().allocate(curr_items_alloc_);
995
- double* tmp_weights = AllocDouble().allocate(curr_items_alloc_);
994
+ T* tmp_data = allocator_.allocate(curr_items_alloc_);
995
+ double* tmp_weights = AllocDouble(allocator_).allocate(curr_items_alloc_);
996
996
 
997
997
  for (uint32_t i = 0; i < prev_size; ++i) {
998
998
  new (&tmp_data[i]) T(std::move(data_[i]));
999
- A().destroy(data_ + i);
999
+ allocator_.destroy(data_ + i);
1000
1000
  tmp_weights[i] = weights_[i];
1001
1001
  }
1002
1002
 
1003
- A().deallocate(data_, prev_size);
1004
- AllocDouble().deallocate(weights_, prev_size);
1003
+ allocator_.deallocate(data_, prev_size);
1004
+ AllocDouble(allocator_).deallocate(weights_, prev_size);
1005
1005
 
1006
1006
  data_ = tmp_data;
1007
1007
  weights_ = tmp_weights;
1008
1008
 
1009
1009
  if (marks_ != nullptr) {
1010
- bool* tmp_marks = AllocBool().allocate(curr_items_alloc_);
1010
+ bool* tmp_marks = AllocBool(allocator_).allocate(curr_items_alloc_);
1011
1011
  for (uint32_t i = 0; i < prev_size; ++i) {
1012
1012
  tmp_marks[i] = marks_[i];
1013
1013
  }
1014
- AllocBool().deallocate(marks_, prev_size);
1014
+ AllocBool(allocator_).deallocate(marks_, prev_size);
1015
1015
  marks_ = tmp_marks;
1016
1016
  }
1017
1017
  }
@@ -1296,7 +1296,7 @@ template<typename T, typename S, typename A>
1296
1296
  void var_opt_sketch<T,S,A>::strip_marks() {
1297
1297
  if (marks_ == nullptr) throw std::logic_error("request to strip marks from non-gadget");
1298
1298
  num_marks_in_h_ = 0;
1299
- AllocBool().deallocate(marks_, curr_items_alloc_);
1299
+ AllocBool(allocator_).deallocate(marks_, curr_items_alloc_);
1300
1300
  marks_ = nullptr;
1301
1301
  }
1302
1302
 
@@ -1411,7 +1411,7 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
1411
1411
  if (effective_sampling_rate < 0.0 || effective_sampling_rate > 1.0)
1412
1412
  throw std::logic_error("invalid sampling rate outside [0.0, 1.0]");
1413
1413
 
1414
- size_t r_true_count = 0;
1414
+ uint32_t r_true_count = 0;
1415
1415
  ++idx; // skip the gap
1416
1416
  for (; idx < (k_ + 1); ++idx) {
1417
1417
  if (predicate(data_[idx])) {
@@ -1433,10 +1433,10 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
1433
1433
  template<typename T, typename S, typename A>
1434
1434
  class var_opt_sketch<T, S, A>::items_deleter {
1435
1435
  public:
1436
- items_deleter(uint32_t num) : num(num), h_count(0), r_count(0) {}
1436
+ items_deleter(uint32_t num, const A& allocator) : num(num), h_count(0), r_count(0), allocator(allocator) {}
1437
1437
  void set_h(uint32_t h) { h_count = h; }
1438
1438
  void set_r(uint32_t r) { r_count = r; }
1439
- void operator() (T* ptr) const {
1439
+ void operator() (T* ptr) {
1440
1440
  if (h_count > 0) {
1441
1441
  for (size_t i = 0; i < h_count; ++i) {
1442
1442
  ptr[i].~T();
@@ -1449,39 +1449,42 @@ class var_opt_sketch<T, S, A>::items_deleter {
1449
1449
  }
1450
1450
  }
1451
1451
  if (ptr != nullptr) {
1452
- A().deallocate(ptr, num);
1452
+ allocator.deallocate(ptr, num);
1453
1453
  }
1454
1454
  }
1455
1455
  private:
1456
1456
  uint32_t num;
1457
1457
  uint32_t h_count;
1458
1458
  uint32_t r_count;
1459
+ A allocator;
1459
1460
  };
1460
1461
 
1461
1462
  template<typename T, typename S, typename A>
1462
1463
  class var_opt_sketch<T, S, A>::weights_deleter {
1463
1464
  public:
1464
- weights_deleter(uint32_t num) : num(num) {}
1465
- void operator() (double* ptr) const {
1465
+ weights_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
1466
+ void operator() (double* ptr) {
1466
1467
  if (ptr != nullptr) {
1467
- AllocDouble().deallocate(ptr, num);
1468
+ allocator.deallocate(ptr, num);
1468
1469
  }
1469
1470
  }
1470
1471
  private:
1471
1472
  uint32_t num;
1473
+ AllocDouble allocator;
1472
1474
  };
1473
1475
 
1474
1476
  template<typename T, typename S, typename A>
1475
1477
  class var_opt_sketch<T, S, A>::marks_deleter {
1476
1478
  public:
1477
- marks_deleter(uint32_t num) : num(num) {}
1478
- void operator() (bool* ptr) const {
1479
+ marks_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
1480
+ void operator() (bool* ptr) {
1479
1481
  if (ptr != nullptr) {
1480
- AllocBool().deallocate(ptr, 1);
1482
+ allocator.deallocate(ptr, 1);
1481
1483
  }
1482
1484
  }
1483
1485
  private:
1484
1486
  uint32_t num;
1487
+ AllocBool allocator;
1485
1488
  };
1486
1489
 
1487
1490