datasketches 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  4. data/ext/datasketches/ext.cpp +1 -1
  5. data/ext/datasketches/ext.h +4 -0
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/fi_wrapper.cpp +6 -8
  8. data/ext/datasketches/hll_wrapper.cpp +13 -14
  9. data/ext/datasketches/kll_wrapper.cpp +28 -76
  10. data/ext/datasketches/theta_wrapper.cpp +27 -41
  11. data/ext/datasketches/vo_wrapper.cpp +4 -6
  12. data/lib/datasketches/version.rb +1 -1
  13. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  14. data/vendor/datasketches-cpp/README.md +4 -4
  15. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
  16. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  17. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  18. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  19. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  20. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
  22. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
  24. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
  25. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  26. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
  27. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
  28. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
  29. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
  30. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
  31. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  33. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  34. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
  35. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  36. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  38. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
  41. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
  44. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
  45. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
  46. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
  47. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  48. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  49. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
  50. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
  51. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
  52. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
  53. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
  54. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
  55. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
  56. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
  57. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
  58. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
  59. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
  60. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
  62. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  63. data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
  64. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
  65. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
  66. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
  67. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
  68. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
  70. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
  71. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
  72. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  74. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
  75. data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
  76. data/vendor/datasketches-cpp/python/README.md +6 -3
  77. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  78. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
  79. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
  80. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  81. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
  82. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
  83. data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
  84. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  85. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  86. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  87. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
  88. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  89. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
  90. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  91. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  92. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  93. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  94. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  95. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  96. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  97. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  98. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  99. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  100. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  101. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
  104. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  105. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
  106. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  107. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  108. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
  109. data/vendor/datasketches-cpp/setup.py +5 -3
  110. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  111. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
  112. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  114. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  115. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  116. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
  117. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
  119. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  120. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  121. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  122. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  123. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
  124. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  125. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  126. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
  127. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
  128. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  129. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  130. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
  131. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  132. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
  133. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
  134. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  135. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
  136. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
  137. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  138. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
  141. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  142. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
  143. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
  144. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
  145. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
  146. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
  147. metadata +43 -34
  148. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  149. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  150. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  151. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  152. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  153. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  154. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  155. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  156. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  157. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  160. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -40,15 +40,20 @@ namespace datasketches {
40
40
 
41
41
  enum frequent_items_error_type { NO_FALSE_POSITIVES, NO_FALSE_NEGATIVES };
42
42
 
43
- // for serialization as raw bytes
44
- template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
45
- template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
46
-
47
43
  // type W for weight must be an arithmetic type (integral or floating point)
48
- template<typename T, typename W = uint64_t, typename H = std::hash<T>, typename E = std::equal_to<T>, typename S = serde<T>, typename A = std::allocator<T>>
44
+ template<
45
+ typename T,
46
+ typename W = uint64_t,
47
+ typename H = std::hash<T>,
48
+ typename E = std::equal_to<T>,
49
+ typename S = serde<T>,
50
+ typename A = std::allocator<T>
51
+ >
49
52
  class frequent_items_sketch {
50
53
  public:
51
54
 
55
+ static const uint8_t LG_MIN_MAP_SIZE = 3;
56
+
52
57
  /**
53
58
  * Construct this sketch with parameters lg_max_map_size and lg_start_map_size.
54
59
  *
@@ -59,7 +64,7 @@ public:
59
64
  * @param lg_start_map_size Log2 of the starting physical size of the internal hash
60
65
  * map managed by this sketch.
61
66
  */
62
- explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE);
67
+ explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE, const A& allocator = A());
63
68
 
64
69
  /**
65
70
  * Update this sketch with an item and a positive weight (frequency count).
@@ -232,7 +237,8 @@ public:
232
237
 
233
238
  // This is a convenience alias for users
234
239
  // The type returned by the following serialize method
235
- typedef vector_u8<A> vector_bytes;
240
+ using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
241
+
236
242
 
237
243
  /**
238
244
  * This method serializes the sketch as a vector of bytes.
@@ -249,7 +255,7 @@ public:
249
255
  * @param is input stream
250
256
  * @return an instance of the sketch
251
257
  */
252
- static frequent_items_sketch deserialize(std::istream& is);
258
+ static frequent_items_sketch deserialize(std::istream& is, const A& allocator = A());
253
259
 
254
260
  /**
255
261
  * This method deserializes a sketch from a given array of bytes.
@@ -257,7 +263,7 @@ public:
257
263
  * @param size the size of the array
258
264
  * @return an instance of the sketch
259
265
  */
260
- static frequent_items_sketch deserialize(const void* bytes, size_t size);
266
+ static frequent_items_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
261
267
 
262
268
  /**
263
269
  * Returns a human readable summary of this sketch
@@ -266,7 +272,6 @@ public:
266
272
  string<A> to_string(bool print_items = false) const;
267
273
 
268
274
  private:
269
- static const uint8_t LG_MIN_MAP_SIZE = 3;
270
275
  static const uint8_t SERIAL_VERSION = 1;
271
276
  static const uint8_t FAMILY_ID = 10;
272
277
  static const uint8_t PREAMBLE_LONGS_EMPTY = 1;
@@ -33,10 +33,14 @@ template<typename T, typename W, typename H, typename E, typename S, typename A>
33
33
  const uint8_t frequent_items_sketch<T, W, H, E, S, A>::LG_MIN_MAP_SIZE;
34
34
 
35
35
  template<typename T, typename W, typename H, typename E, typename S, typename A>
36
- frequent_items_sketch<T, W, H, E, S, A>::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size):
36
+ frequent_items_sketch<T, W, H, E, S, A>::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size, const A& allocator):
37
37
  total_weight(0),
38
38
  offset(0),
39
- map(std::max(lg_start_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE), std::max(lg_max_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE))
39
+ map(
40
+ std::max(lg_start_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE),
41
+ std::max(lg_max_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE),
42
+ allocator
43
+ )
40
44
  {
41
45
  if (lg_start_map_size > lg_max_map_size) throw std::invalid_argument("starting size must not be greater than maximum size");
42
46
  }
@@ -142,7 +146,7 @@ frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error
142
146
  template<typename T, typename W, typename H, typename E, typename S, typename A>
143
147
  typename frequent_items_sketch<T, W, H, E, S, A>::vector_row
144
148
  frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error_type err_type, W threshold) const {
145
- vector_row items;
149
+ vector_row items(map.get_allocator());
146
150
  for (auto &it: map) {
147
151
  const W lb = it.second;
148
152
  const W ub = it.second + offset;
@@ -182,19 +186,21 @@ void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os) const
182
186
  os.write((char*)&offset, sizeof(offset));
183
187
 
184
188
  // copy active items and their weights to use batch serialization
185
- typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
186
- W* weights = AllocW().allocate(num_items);
187
- T* items = A().allocate(num_items);
189
+ using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
190
+ AllocW aw(map.get_allocator());
191
+ W* weights = aw.allocate(num_items);
192
+ A alloc(map.get_allocator());
193
+ T* items = alloc.allocate(num_items);
188
194
  uint32_t i = 0;
189
195
  for (auto &it: map) {
190
196
  new (&items[i]) T(it.first);
191
197
  weights[i++] = it.second;
192
198
  }
193
199
  os.write((char*)weights, sizeof(W) * num_items);
194
- AllocW().deallocate(weights, num_items);
200
+ aw.deallocate(weights, num_items);
195
201
  S().serialize(os, items, num_items);
196
202
  for (unsigned i = 0; i < num_items; i++) items[i].~T();
197
- A().deallocate(items, num_items);
203
+ alloc.deallocate(items, num_items);
198
204
  }
199
205
  }
200
206
 
@@ -207,9 +213,9 @@ size_t frequent_items_sketch<T, W, H, E, S, A>::get_serialized_size_bytes() cons
207
213
  }
208
214
 
209
215
  template<typename T, typename W, typename H, typename E, typename S, typename A>
210
- vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const {
216
+ auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
211
217
  const size_t size = header_size_bytes + get_serialized_size_bytes();
212
- vector_u8<A> bytes(size);
218
+ vector_bytes bytes(size, 0, map.get_allocator());
213
219
  uint8_t* ptr = bytes.data() + header_size_bytes;
214
220
  uint8_t* end_ptr = ptr + size;
215
221
 
@@ -238,20 +244,22 @@ vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_
238
244
  ptr += copy_to_mem(&offset, ptr, sizeof(offset));
239
245
 
240
246
  // copy active items and their weights to use batch serialization
241
- typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
242
- W* weights = AllocW().allocate(num_items);
243
- T* items = A().allocate(num_items);
247
+ using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
248
+ AllocW aw(map.get_allocator());
249
+ W* weights = aw.allocate(num_items);
250
+ A alloc(map.get_allocator());
251
+ T* items = alloc.allocate(num_items);
244
252
  uint32_t i = 0;
245
253
  for (auto &it: map) {
246
254
  new (&items[i]) T(it.first);
247
255
  weights[i++] = it.second;
248
256
  }
249
257
  ptr += copy_to_mem(weights, ptr, sizeof(W) * num_items);
250
- AllocW().deallocate(weights, num_items);
258
+ aw.deallocate(weights, num_items);
251
259
  const size_t bytes_remaining = end_ptr - ptr;
252
260
  ptr += S().serialize(ptr, bytes_remaining, items, num_items);
253
261
  for (unsigned i = 0; i < num_items; i++) items[i].~T();
254
- A().deallocate(items, num_items);
262
+ alloc.deallocate(items, num_items);
255
263
  }
256
264
  return bytes;
257
265
  }
@@ -259,23 +267,25 @@ vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_
259
267
  template<typename T, typename W, typename H, typename E, typename S, typename A>
260
268
  class frequent_items_sketch<T, W, H, E, S, A>::items_deleter {
261
269
  public:
262
- items_deleter(uint32_t num, bool destroy): num(num), destroy(destroy) {}
270
+ items_deleter(uint32_t num, bool destroy, const A& allocator):
271
+ allocator(allocator), num(num), destroy(destroy) {}
263
272
  void set_destroy(bool destroy) { this->destroy = destroy; }
264
- void operator() (T* ptr) const {
273
+ void operator() (T* ptr) {
265
274
  if (ptr != nullptr) {
266
275
  if (destroy) {
267
276
  for (uint32_t i = 0; i < num; ++i) ptr[i].~T();
268
277
  }
269
- A().deallocate(ptr, num);
278
+ allocator.deallocate(ptr, num);
270
279
  }
271
280
  }
272
281
  private:
282
+ A allocator;
273
283
  uint32_t num;
274
284
  bool destroy;
275
285
  };
276
286
 
277
287
  template<typename T, typename W, typename H, typename E, typename S, typename A>
278
- frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is) {
288
+ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is, const A& allocator) {
279
289
  uint8_t preamble_longs;
280
290
  is.read((char*)&preamble_longs, sizeof(preamble_longs));
281
291
  uint8_t serial_version;
@@ -298,7 +308,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
298
308
  check_family_id(family_id);
299
309
  check_size(lg_cur_size, lg_max_size);
300
310
 
301
- frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
311
+ frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
302
312
  if (!is_empty) {
303
313
  uint32_t num_items;
304
314
  is.read((char*)&num_items, sizeof(num_items));
@@ -310,10 +320,11 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
310
320
  is.read((char*)&offset, sizeof(offset));
311
321
 
312
322
  // batch deserialization with intermediate array of items and weights
313
- typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
314
- std::vector<W, AllocW> weights(num_items);
323
+ using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
324
+ std::vector<W, AllocW> weights(num_items, 0, allocator);
315
325
  is.read((char*)weights.data(), sizeof(W) * num_items);
316
- std::unique_ptr<T, items_deleter> items(A().allocate(num_items), items_deleter(num_items, false));
326
+ A alloc(allocator);
327
+ std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
317
328
  S().deserialize(is, items.get(), num_items);
318
329
  items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
319
330
  for (uint32_t i = 0; i < num_items; i++) {
@@ -328,7 +339,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
328
339
  }
329
340
 
330
341
  template<typename T, typename W, typename H, typename E, typename S, typename A>
331
- frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size) {
342
+ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
332
343
  ensure_minimum_memory(size, 8);
333
344
  const char* ptr = static_cast<const char*>(bytes);
334
345
  const char* base = static_cast<const char*>(bytes);
@@ -355,7 +366,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
355
366
  check_size(lg_cur_size, lg_max_size);
356
367
  ensure_minimum_memory(size, 1 << preamble_longs);
357
368
 
358
- frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
369
+ frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
359
370
  if (!is_empty) {
360
371
  uint32_t num_items;
361
372
  ptr += copy_from_mem(ptr, &num_items, sizeof(uint32_t));
@@ -368,10 +379,11 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
368
379
 
369
380
  ensure_minimum_memory(size, ptr - base + (sizeof(W) * num_items));
370
381
  // batch deserialization with intermediate array of items and weights
371
- typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
372
- std::vector<W, AllocW> weights(num_items);
382
+ using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
383
+ std::vector<W, AllocW> weights(num_items, 0, allocator);
373
384
  ptr += copy_from_mem(ptr, weights.data(), sizeof(W) * num_items);
374
- std::unique_ptr<T, items_deleter> items(A().allocate(num_items), items_deleter(num_items, false));
385
+ A alloc(allocator);
386
+ std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
375
387
  const size_t bytes_remaining = size - (ptr - base);
376
388
  ptr += S().deserialize(ptr, bytes_remaining, items.get(), num_items);
377
389
  items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
@@ -39,33 +39,39 @@ public:
39
39
  using AllocV = typename std::allocator_traits<A>::template rebind_alloc<V>;
40
40
  using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
41
41
 
42
- reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size);
42
+ reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size, const A& allocator);
43
43
  reverse_purge_hash_map(const reverse_purge_hash_map& other);
44
44
  reverse_purge_hash_map(reverse_purge_hash_map&& other) noexcept;
45
45
  ~reverse_purge_hash_map();
46
46
  reverse_purge_hash_map& operator=(reverse_purge_hash_map other);
47
47
  reverse_purge_hash_map& operator=(reverse_purge_hash_map&& other);
48
- V adjust_or_insert(const K& key, V value);
49
- V adjust_or_insert(K&& key, V value);
48
+
49
+ template<typename FwdK>
50
+ V adjust_or_insert(FwdK&& key, V value);
51
+
50
52
  V get(const K& key) const;
51
53
  uint8_t get_lg_cur_size() const;
52
54
  uint8_t get_lg_max_size() const;
53
55
  uint32_t get_capacity() const;
54
56
  uint32_t get_num_active() const;
57
+ const A& get_allocator() const;
58
+
55
59
  class iterator;
56
60
  iterator begin() const;
57
61
  iterator end() const;
62
+
58
63
  private:
59
64
  static constexpr double LOAD_FACTOR = 0.75;
60
65
  static constexpr uint16_t DRIFT_LIMIT = 1024; // used only for stress testing
61
66
  static constexpr uint32_t MAX_SAMPLE_SIZE = 1024; // number of samples to compute approximate median during purge
62
67
 
63
- uint8_t lg_cur_size;
64
- uint8_t lg_max_size;
65
- uint32_t num_active;
66
- K* keys;
67
- V* values;
68
- uint16_t* states;
68
+ A allocator_;
69
+ uint8_t lg_cur_size_;
70
+ uint8_t lg_max_size_;
71
+ uint32_t num_active_;
72
+ K* keys_;
73
+ V* values_;
74
+ uint16_t* states_;
69
75
 
70
76
  inline bool is_active(uint32_t probe) const;
71
77
  void subtract_and_keep_positive_only(V amount);
@@ -83,8 +89,8 @@ public:
83
89
  friend class reverse_purge_hash_map<K, V, H, E, A>;
84
90
  iterator& operator++() {
85
91
  ++count;
86
- if (count < map->num_active) {
87
- const uint32_t mask = (1 << map->lg_cur_size) - 1;
92
+ if (count < map->num_active_) {
93
+ const uint32_t mask = (1 << map->lg_cur_size_) - 1;
88
94
  do {
89
95
  index = (index + stride) & mask;
90
96
  } while (!map->is_active(index));
@@ -95,7 +101,7 @@ public:
95
101
  bool operator==(const iterator& rhs) const { return count == rhs.count; }
96
102
  bool operator!=(const iterator& rhs) const { return count != rhs.count; }
97
103
  const std::pair<K&, V> operator*() const {
98
- return std::pair<K&, V>(map->keys[index], map->values[index]);
104
+ return std::pair<K&, V>(map->keys_[index], map->values_[index]);
99
105
  }
100
106
  private:
101
107
  static constexpr double GOLDEN_RATIO_RECIPROCAL = 0.6180339887498949; // = (sqrt(5) - 1) / 2
@@ -104,7 +110,7 @@ private:
104
110
  uint32_t count;
105
111
  uint32_t stride;
106
112
  iterator(const reverse_purge_hash_map<K, V, H, E, A>* map, uint32_t index, uint32_t count):
107
- map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size) * GOLDEN_RATIO_RECIPROCAL) | 1) {}
113
+ map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size_) * GOLDEN_RATIO_RECIPROCAL) | 1) {}
108
114
  };
109
115
 
110
116
  } /* namespace datasketches */
@@ -34,113 +34,121 @@ template<typename K, typename V, typename H, typename E, typename A>
34
34
  constexpr uint32_t reverse_purge_hash_map<K, V, H, E, A>::MAX_SAMPLE_SIZE;
35
35
 
36
36
  template<typename K, typename V, typename H, typename E, typename A>
37
- reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size):
38
- lg_cur_size(lg_cur_size),
39
- lg_max_size(lg_max_size),
40
- num_active(0),
41
- keys(A().allocate(1 << lg_cur_size)),
42
- values(AllocV().allocate(1 << lg_cur_size)),
43
- states(AllocU16().allocate(1 << lg_cur_size))
37
+ reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size, const A& allocator):
38
+ allocator_(allocator),
39
+ lg_cur_size_(lg_cur_size),
40
+ lg_max_size_(lg_max_size),
41
+ num_active_(0),
42
+ keys_(allocator_.allocate(1 << lg_cur_size)),
43
+ values_(nullptr),
44
+ states_(nullptr)
44
45
  {
45
- std::fill(states, &states[1 << lg_cur_size], 0);
46
+ AllocV av(allocator_);
47
+ values_ = av.allocate(1 << lg_cur_size);
48
+ AllocU16 au16(allocator_);
49
+ states_ = au16.allocate(1 << lg_cur_size);
50
+ std::fill(states_, states_ + (1 << lg_cur_size), 0);
46
51
  }
47
52
 
48
53
  template<typename K, typename V, typename H, typename E, typename A>
49
54
  reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(const reverse_purge_hash_map<K, V, H, E, A>& other):
50
- lg_cur_size(other.lg_cur_size),
51
- lg_max_size(other.lg_max_size),
52
- num_active(other.num_active),
53
- keys(A().allocate(1 << lg_cur_size)),
54
- values(AllocV().allocate(1 << lg_cur_size)),
55
- states(AllocU16().allocate(1 << lg_cur_size))
55
+ allocator_(other.allocator_),
56
+ lg_cur_size_(other.lg_cur_size_),
57
+ lg_max_size_(other.lg_max_size_),
58
+ num_active_(other.num_active_),
59
+ keys_(allocator_.allocate(1 << lg_cur_size_)),
60
+ values_(nullptr),
61
+ states_(nullptr)
56
62
  {
57
- const uint32_t size = 1 << lg_cur_size;
58
- if (num_active > 0) {
59
- auto num = num_active;
63
+ AllocV av(allocator_);
64
+ values_ = av.allocate(1 << lg_cur_size_);
65
+ AllocU16 au16(allocator_);
66
+ states_ = au16.allocate(1 << lg_cur_size_);
67
+ const uint32_t size = 1 << lg_cur_size_;
68
+ if (num_active_ > 0) {
69
+ auto num = num_active_;
60
70
  for (uint32_t i = 0; i < size; i++) {
61
- if (other.states[i] > 0) {
62
- new (&keys[i]) K(other.keys[i]);
63
- values[i] = other.values[i];
71
+ if (other.states_[i] > 0) {
72
+ new (&keys_[i]) K(other.keys_[i]);
73
+ values_[i] = other.values_[i];
64
74
  }
65
75
  if (--num == 0) break;
66
76
  }
67
77
  }
68
- std::copy(&other.states[0], &other.states[size], states);
78
+ std::copy(other.states_, other.states_ + size, states_);
69
79
  }
70
80
 
71
81
  template<typename K, typename V, typename H, typename E, typename A>
72
82
  reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(reverse_purge_hash_map<K, V, H, E, A>&& other) noexcept:
73
- lg_cur_size(other.lg_cur_size),
74
- lg_max_size(other.lg_max_size),
75
- num_active(other.num_active),
76
- keys(nullptr),
77
- values(nullptr),
78
- states(nullptr)
83
+ allocator_(std::move(other.allocator_)),
84
+ lg_cur_size_(other.lg_cur_size_),
85
+ lg_max_size_(other.lg_max_size_),
86
+ num_active_(other.num_active_),
87
+ keys_(nullptr),
88
+ values_(nullptr),
89
+ states_(nullptr)
79
90
  {
80
- std::swap(keys, other.keys);
81
- std::swap(values, other.values);
82
- std::swap(states, other.states);
83
- other.num_active = 0;
91
+ std::swap(keys_, other.keys_);
92
+ std::swap(values_, other.values_);
93
+ std::swap(states_, other.states_);
94
+ other.num_active_ = 0;
84
95
  }
85
96
 
86
97
  template<typename K, typename V, typename H, typename E, typename A>
87
98
  reverse_purge_hash_map<K, V, H, E, A>::~reverse_purge_hash_map() {
88
- const uint32_t size = 1 << lg_cur_size;
89
- if (num_active > 0) {
99
+ const uint32_t size = 1 << lg_cur_size_;
100
+ if (num_active_ > 0) {
90
101
  for (uint32_t i = 0; i < size; i++) {
91
102
  if (is_active(i)) {
92
- keys[i].~K();
93
- if (--num_active == 0) break;
103
+ keys_[i].~K();
104
+ if (--num_active_ == 0) break;
94
105
  }
95
106
  }
96
107
  }
97
- if (keys != nullptr)
98
- A().deallocate(keys, size);
99
- if (values != nullptr)
100
- AllocV().deallocate(values, size);
101
- if (states != nullptr)
102
- AllocU16().deallocate(states, size);
108
+ if (keys_ != nullptr) {
109
+ allocator_.deallocate(keys_, size);
110
+ }
111
+ if (values_ != nullptr) {
112
+ AllocV av(allocator_);
113
+ av.deallocate(values_, size);
114
+ }
115
+ if (states_ != nullptr) {
116
+ AllocU16 au16(allocator_);
117
+ au16.deallocate(states_, size);
118
+ }
103
119
  }
104
120
 
105
121
  template<typename K, typename V, typename H, typename E, typename A>
106
122
  reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A> other) {
107
- std::swap(lg_cur_size, other.lg_cur_size);
108
- std::swap(lg_max_size, other.lg_max_size);
109
- std::swap(num_active, other.num_active);
110
- std::swap(keys, other.keys);
111
- std::swap(values, other.values);
112
- std::swap(states, other.states);
123
+ std::swap(allocator_, other.allocator_);
124
+ std::swap(lg_cur_size_, other.lg_cur_size_);
125
+ std::swap(lg_max_size_, other.lg_max_size_);
126
+ std::swap(num_active_, other.num_active_);
127
+ std::swap(keys_, other.keys_);
128
+ std::swap(values_, other.values_);
129
+ std::swap(states_, other.states_);
113
130
  return *this;
114
131
  }
115
132
 
116
133
  template<typename K, typename V, typename H, typename E, typename A>
117
134
  reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A>&& other) {
118
- std::swap(lg_cur_size, other.lg_cur_size);
119
- std::swap(lg_max_size, other.lg_max_size);
120
- std::swap(num_active, other.num_active);
121
- std::swap(keys, other.keys);
122
- std::swap(values, other.values);
123
- std::swap(states, other.states);
135
+ std::swap(allocator_, other.allocator_);
136
+ std::swap(lg_cur_size_, other.lg_cur_size_);
137
+ std::swap(lg_max_size_, other.lg_max_size_);
138
+ std::swap(num_active_, other.num_active_);
139
+ std::swap(keys_, other.keys_);
140
+ std::swap(values_, other.values_);
141
+ std::swap(states_, other.states_);
124
142
  return *this;
125
143
  }
126
144
 
127
145
  template<typename K, typename V, typename H, typename E, typename A>
128
- V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(const K& key, V value) {
129
- const uint32_t num_active_before = num_active;
146
+ template<typename FwdK>
147
+ V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(FwdK&& key, V value) {
148
+ const uint32_t num_active_before = num_active_;
130
149
  const uint32_t index = internal_adjust_or_insert(key, value);
131
- if (num_active > num_active_before) {
132
- new (&keys[index]) K(key);
133
- return resize_or_purge_if_needed();
134
- }
135
- return 0;
136
- }
137
-
138
- template<typename K, typename V, typename H, typename E, typename A>
139
- V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(K&& key, V value) {
140
- const uint32_t num_active_before = num_active;
141
- const uint32_t index = internal_adjust_or_insert(key, value);
142
- if (num_active > num_active_before) {
143
- new (&keys[index]) K(std::move(key));
150
+ if (num_active_ > num_active_before) {
151
+ new (&keys_[index]) K(std::forward<FwdK>(key));
144
152
  return resize_or_purge_if_needed();
145
153
  }
146
154
  return 0;
@@ -148,10 +156,10 @@ V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(K&& key, V value) {
148
156
 
149
157
  template<typename K, typename V, typename H, typename E, typename A>
150
158
  V reverse_purge_hash_map<K, V, H, E, A>::get(const K& key) const {
151
- const uint32_t mask = (1 << lg_cur_size) - 1;
159
+ const uint32_t mask = (1 << lg_cur_size_) - 1;
152
160
  uint32_t probe = fmix64(H()(key)) & mask;
153
161
  while (is_active(probe)) {
154
- if (E()(keys[probe], key)) return values[probe];
162
+ if (E()(keys_[probe], key)) return values_[probe];
155
163
  probe = (probe + 1) & mask;
156
164
  }
157
165
  return 0;
@@ -159,27 +167,32 @@ V reverse_purge_hash_map<K, V, H, E, A>::get(const K& key) const {
159
167
 
160
168
  template<typename K, typename V, typename H, typename E, typename A>
161
169
  uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_cur_size() const {
162
- return lg_cur_size;
170
+ return lg_cur_size_;
163
171
  }
164
172
 
165
173
  template<typename K, typename V, typename H, typename E, typename A>
166
174
  uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_max_size() const {
167
- return lg_max_size;
175
+ return lg_max_size_;
168
176
  }
169
177
 
170
178
  template<typename K, typename V, typename H, typename E, typename A>
171
179
  uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_capacity() const {
172
- return (1 << lg_cur_size) * LOAD_FACTOR;
180
+ return (1 << lg_cur_size_) * LOAD_FACTOR;
173
181
  }
174
182
 
175
183
  template<typename K, typename V, typename H, typename E, typename A>
176
184
  uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_num_active() const {
177
- return num_active;
185
+ return num_active_;
186
+ }
187
+
188
+ template<typename K, typename V, typename H, typename E, typename A>
189
+ const A& reverse_purge_hash_map<K, V, H, E, A>::get_allocator() const {
190
+ return allocator_;
178
191
  }
179
192
 
180
193
  template<typename K, typename V, typename H, typename E, typename A>
181
194
  typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::begin() const {
182
- const uint32_t size = 1 << lg_cur_size;
195
+ const uint32_t size = 1 << lg_cur_size_;
183
196
  uint32_t i = 0;
184
197
  while (i < size && !is_active(i)) i++;
185
198
  return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, i, 0);
@@ -187,40 +200,40 @@ typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<
187
200
 
188
201
  template<typename K, typename V, typename H, typename E, typename A>
189
202
  typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::end() const {
190
- return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, 1 << lg_cur_size, num_active);
203
+ return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, 1 << lg_cur_size_, num_active_);
191
204
  }
192
205
 
193
206
  template<typename K, typename V, typename H, typename E, typename A>
194
207
  bool reverse_purge_hash_map<K, V, H, E, A>::is_active(uint32_t index) const {
195
- return states[index] > 0;
208
+ return states_[index] > 0;
196
209
  }
197
210
 
198
211
  template<typename K, typename V, typename H, typename E, typename A>
199
212
  void reverse_purge_hash_map<K, V, H, E, A>::subtract_and_keep_positive_only(V amount) {
200
213
  // starting from the back, find the first empty cell,
201
214
  // which establishes the high end of a cluster.
202
- uint32_t first_probe = (1 << lg_cur_size) - 1;
215
+ uint32_t first_probe = (1 << lg_cur_size_) - 1;
203
216
  while (is_active(first_probe)) first_probe--;
204
217
  // when we find the next non-empty cell, we know we are at the high end of a cluster
205
218
  // work towards the front, delete any non-positive entries.
206
219
  for (uint32_t probe = first_probe; probe-- > 0;) {
207
220
  if (is_active(probe)) {
208
- if (values[probe] <= amount) {
221
+ if (values_[probe] <= amount) {
209
222
  hash_delete(probe); // does the work of deletion and moving higher items towards the front
210
- num_active--;
223
+ num_active_--;
211
224
  } else {
212
- values[probe] -= amount;
225
+ values_[probe] -= amount;
213
226
  }
214
227
  }
215
228
  }
216
229
  // now work on the first cluster that was skipped
217
- for (uint32_t probe = (1 << lg_cur_size); probe-- > first_probe;) {
230
+ for (uint32_t probe = (1 << lg_cur_size_); probe-- > first_probe;) {
218
231
  if (is_active(probe)) {
219
- if (values[probe] <= amount) {
232
+ if (values_[probe] <= amount) {
220
233
  hash_delete(probe);
221
- num_active--;
234
+ num_active_--;
222
235
  } else {
223
- values[probe] -= amount;
236
+ values_[probe] -= amount;
224
237
  }
225
238
  }
226
239
  }
@@ -231,20 +244,20 @@ void reverse_purge_hash_map<K, V, H, E, A>::hash_delete(uint32_t delete_index) {
231
244
  // Looks ahead in the table to search for another
232
245
  // item to move to this location
233
246
  // if none are found, the status is changed
234
- states[delete_index] = 0; // mark as empty
235
- keys[delete_index].~K();
247
+ states_[delete_index] = 0; // mark as empty
248
+ keys_[delete_index].~K();
236
249
  uint32_t drift = 1;
237
- const uint32_t mask = (1 << lg_cur_size) - 1;
250
+ const uint32_t mask = (1 << lg_cur_size_) - 1;
238
251
  uint32_t probe = (delete_index + drift) & mask; // map length must be a power of 2
239
252
  // advance until we find a free location replacing locations as needed
240
253
  while (is_active(probe)) {
241
- if (states[probe] > drift) {
254
+ if (states_[probe] > drift) {
242
255
  // move current element
243
- new (&keys[delete_index]) K(std::move(keys[probe]));
244
- values[delete_index] = values[probe];
245
- states[delete_index] = states[probe] - drift;
246
- states[probe] = 0; // mark as empty
247
- keys[probe].~K();
256
+ new (&keys_[delete_index]) K(std::move(keys_[probe]));
257
+ values_[delete_index] = values_[probe];
258
+ states_[delete_index] = states_[probe] - drift;
259
+ states_[probe] = 0; // mark as empty
260
+ keys_[probe].~K();
248
261
  drift = 0;
249
262
  delete_index = probe;
250
263
  }
@@ -257,13 +270,13 @@ void reverse_purge_hash_map<K, V, H, E, A>::hash_delete(uint32_t delete_index) {
257
270
 
258
271
  template<typename K, typename V, typename H, typename E, typename A>
259
272
  uint32_t reverse_purge_hash_map<K, V, H, E, A>::internal_adjust_or_insert(const K& key, V value) {
260
- const uint32_t mask = (1 << lg_cur_size) - 1;
273
+ const uint32_t mask = (1 << lg_cur_size_) - 1;
261
274
  uint32_t index = fmix64(H()(key)) & mask;
262
275
  uint16_t drift = 1;
263
276
  while (is_active(index)) {
264
- if (E()(keys[index], key)) {
277
+ if (E()(keys_[index], key)) {
265
278
  // adjusting the value of an existing key
266
- values[index] += value;
279
+ values_[index] += value;
267
280
  return index;
268
281
  }
269
282
  index = (index + 1) & mask;
@@ -272,23 +285,23 @@ uint32_t reverse_purge_hash_map<K, V, H, E, A>::internal_adjust_or_insert(const
272
285
  if (drift >= DRIFT_LIMIT) throw std::logic_error("drift limit reached");
273
286
  }
274
287
  // adding the key and value to the table
275
- if (num_active > get_capacity()) {
276
- throw std::logic_error("num_active " + std::to_string(num_active) + " > capacity " + std::to_string(get_capacity()));
288
+ if (num_active_ > get_capacity()) {
289
+ throw std::logic_error("num_active " + std::to_string(num_active_) + " > capacity " + std::to_string(get_capacity()));
277
290
  }
278
- values[index] = value;
279
- states[index] = drift;
280
- num_active++;
291
+ values_[index] = value;
292
+ states_[index] = drift;
293
+ num_active_++;
281
294
  return index;
282
295
  }
283
296
 
284
297
  template<typename K, typename V, typename H, typename E, typename A>
285
298
  V reverse_purge_hash_map<K, V, H, E, A>::resize_or_purge_if_needed() {
286
- if (num_active > get_capacity()) {
287
- if (lg_cur_size < lg_max_size) { // can grow
288
- resize(lg_cur_size + 1);
299
+ if (num_active_ > get_capacity()) {
300
+ if (lg_cur_size_ < lg_max_size_) { // can grow
301
+ resize(lg_cur_size_ + 1);
289
302
  } else { // at target size, must purge
290
303
  const V offset = purge();
291
- if (num_active > get_capacity()) {
304
+ if (num_active_ > get_capacity()) {
292
305
  throw std::logic_error("purge did not reduce number of active items");
293
306
  }
294
307
  return offset;
@@ -299,43 +312,46 @@ V reverse_purge_hash_map<K, V, H, E, A>::resize_or_purge_if_needed() {
299
312
 
300
313
  template<typename K, typename V, typename H, typename E, typename A>
301
314
  void reverse_purge_hash_map<K, V, H, E, A>::resize(uint8_t lg_new_size) {
302
- const uint32_t old_size = 1 << lg_cur_size;
303
- K* old_keys = keys;
304
- V* old_values = values;
305
- uint16_t* old_states = states;
315
+ const uint32_t old_size = 1 << lg_cur_size_;
316
+ K* old_keys = keys_;
317
+ V* old_values = values_;
318
+ uint16_t* old_states = states_;
306
319
  const uint32_t new_size = 1 << lg_new_size;
307
- keys = A().allocate(new_size);
308
- values = AllocV().allocate(new_size);
309
- states = AllocU16().allocate(new_size);
310
- std::fill(states, &states[new_size], 0);
311
- num_active = 0;
312
- lg_cur_size = lg_new_size;
320
+ keys_ = allocator_.allocate(new_size);
321
+ AllocV av(allocator_);
322
+ values_ = av.allocate(new_size);
323
+ AllocU16 au16(allocator_);
324
+ states_ = au16.allocate(new_size);
325
+ std::fill(states_, states_ + new_size, 0);
326
+ num_active_ = 0;
327
+ lg_cur_size_ = lg_new_size;
313
328
  for (uint32_t i = 0; i < old_size; i++) {
314
329
  if (old_states[i] > 0) {
315
330
  adjust_or_insert(std::move(old_keys[i]), old_values[i]);
316
331
  old_keys[i].~K();
317
332
  }
318
333
  }
319
- A().deallocate(old_keys, old_size);
320
- AllocV().deallocate(old_values, old_size);
321
- AllocU16().deallocate(old_states, old_size);
334
+ allocator_.deallocate(old_keys, old_size);
335
+ av.deallocate(old_values, old_size);
336
+ au16.deallocate(old_states, old_size);
322
337
  }
323
338
 
324
339
  template<typename K, typename V, typename H, typename E, typename A>
325
340
  V reverse_purge_hash_map<K, V, H, E, A>::purge() {
326
- const uint32_t limit = std::min(MAX_SAMPLE_SIZE, num_active);
341
+ const uint32_t limit = std::min(MAX_SAMPLE_SIZE, num_active_);
327
342
  uint32_t num_samples = 0;
328
343
  uint32_t i = 0;
329
- V* samples = AllocV().allocate(limit);
344
+ AllocV av(allocator_);
345
+ V* samples = av.allocate(limit);
330
346
  while (num_samples < limit) {
331
347
  if (is_active(i)) {
332
- samples[num_samples++] = values[i];
348
+ samples[num_samples++] = values_[i];
333
349
  }
334
350
  i++;
335
351
  }
336
- std::nth_element(&samples[0], &samples[num_samples / 2], &samples[num_samples]);
352
+ std::nth_element(samples, samples+ (num_samples / 2), samples + num_samples);
337
353
  const V median = samples[num_samples / 2];
338
- AllocV().deallocate(samples, limit);
354
+ av.deallocate(samples, limit);
339
355
  subtract_and_keep_positive_only(median);
340
356
  return median;
341
357
  }