datasketches 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (160) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  4. data/ext/datasketches/ext.cpp +1 -1
  5. data/ext/datasketches/ext.h +4 -0
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/fi_wrapper.cpp +6 -8
  8. data/ext/datasketches/hll_wrapper.cpp +13 -14
  9. data/ext/datasketches/kll_wrapper.cpp +28 -76
  10. data/ext/datasketches/theta_wrapper.cpp +27 -41
  11. data/ext/datasketches/vo_wrapper.cpp +4 -6
  12. data/lib/datasketches/version.rb +1 -1
  13. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  14. data/vendor/datasketches-cpp/README.md +4 -4
  15. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
  16. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  17. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  18. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  19. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  20. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
  22. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
  24. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
  25. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  26. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
  27. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
  28. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
  29. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
  30. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
  31. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  33. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  34. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
  35. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  36. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  38. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
  41. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
  44. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
  45. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
  46. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
  47. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  48. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  49. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
  50. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
  51. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
  52. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
  53. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
  54. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
  55. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
  56. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
  57. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
  58. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
  59. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
  60. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
  62. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  63. data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
  64. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
  65. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
  66. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
  67. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
  68. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
  70. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
  71. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
  72. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  74. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
  75. data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
  76. data/vendor/datasketches-cpp/python/README.md +6 -3
  77. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  78. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
  79. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
  80. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  81. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
  82. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
  83. data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
  84. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  85. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  86. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  87. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
  88. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  89. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
  90. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  91. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  92. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  93. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  94. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  95. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  96. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  97. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  98. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  99. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  100. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  101. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
  104. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  105. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
  106. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  107. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  108. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
  109. data/vendor/datasketches-cpp/setup.py +5 -3
  110. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  111. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
  112. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  114. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  115. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  116. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
  117. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
  119. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  120. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  121. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  122. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  123. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
  124. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  125. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  126. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
  127. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
  128. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  129. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  130. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
  131. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  132. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
  133. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
  134. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  135. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
  136. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
  137. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  138. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
  141. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  142. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
  143. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
  144. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
  145. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
  146. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
  147. metadata +43 -34
  148. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  149. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  150. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  151. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  152. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  153. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  154. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  155. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  156. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  157. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  160. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -40,15 +40,20 @@ namespace datasketches {
40
40
 
41
41
  enum frequent_items_error_type { NO_FALSE_POSITIVES, NO_FALSE_NEGATIVES };
42
42
 
43
- // for serialization as raw bytes
44
- template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
45
- template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
46
-
47
43
  // type W for weight must be an arithmetic type (integral or floating point)
48
- template<typename T, typename W = uint64_t, typename H = std::hash<T>, typename E = std::equal_to<T>, typename S = serde<T>, typename A = std::allocator<T>>
44
+ template<
45
+ typename T,
46
+ typename W = uint64_t,
47
+ typename H = std::hash<T>,
48
+ typename E = std::equal_to<T>,
49
+ typename S = serde<T>,
50
+ typename A = std::allocator<T>
51
+ >
49
52
  class frequent_items_sketch {
50
53
  public:
51
54
 
55
+ static const uint8_t LG_MIN_MAP_SIZE = 3;
56
+
52
57
  /**
53
58
  * Construct this sketch with parameters lg_max_map_size and lg_start_map_size.
54
59
  *
@@ -59,7 +64,7 @@ public:
59
64
  * @param lg_start_map_size Log2 of the starting physical size of the internal hash
60
65
  * map managed by this sketch.
61
66
  */
62
- explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE);
67
+ explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE, const A& allocator = A());
63
68
 
64
69
  /**
65
70
  * Update this sketch with an item and a positive weight (frequency count).
@@ -232,7 +237,8 @@ public:
232
237
 
233
238
  // This is a convenience alias for users
234
239
  // The type returned by the following serialize method
235
- typedef vector_u8<A> vector_bytes;
240
+ using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
241
+
236
242
 
237
243
  /**
238
244
  * This method serializes the sketch as a vector of bytes.
@@ -249,7 +255,7 @@ public:
249
255
  * @param is input stream
250
256
  * @return an instance of the sketch
251
257
  */
252
- static frequent_items_sketch deserialize(std::istream& is);
258
+ static frequent_items_sketch deserialize(std::istream& is, const A& allocator = A());
253
259
 
254
260
  /**
255
261
  * This method deserializes a sketch from a given array of bytes.
@@ -257,7 +263,7 @@ public:
257
263
  * @param size the size of the array
258
264
  * @return an instance of the sketch
259
265
  */
260
- static frequent_items_sketch deserialize(const void* bytes, size_t size);
266
+ static frequent_items_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
261
267
 
262
268
  /**
263
269
  * Returns a human readable summary of this sketch
@@ -266,7 +272,6 @@ public:
266
272
  string<A> to_string(bool print_items = false) const;
267
273
 
268
274
  private:
269
- static const uint8_t LG_MIN_MAP_SIZE = 3;
270
275
  static const uint8_t SERIAL_VERSION = 1;
271
276
  static const uint8_t FAMILY_ID = 10;
272
277
  static const uint8_t PREAMBLE_LONGS_EMPTY = 1;
@@ -33,10 +33,14 @@ template<typename T, typename W, typename H, typename E, typename S, typename A>
33
33
  const uint8_t frequent_items_sketch<T, W, H, E, S, A>::LG_MIN_MAP_SIZE;
34
34
 
35
35
  template<typename T, typename W, typename H, typename E, typename S, typename A>
36
- frequent_items_sketch<T, W, H, E, S, A>::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size):
36
+ frequent_items_sketch<T, W, H, E, S, A>::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size, const A& allocator):
37
37
  total_weight(0),
38
38
  offset(0),
39
- map(std::max(lg_start_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE), std::max(lg_max_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE))
39
+ map(
40
+ std::max(lg_start_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE),
41
+ std::max(lg_max_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE),
42
+ allocator
43
+ )
40
44
  {
41
45
  if (lg_start_map_size > lg_max_map_size) throw std::invalid_argument("starting size must not be greater than maximum size");
42
46
  }
@@ -142,7 +146,7 @@ frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error
142
146
  template<typename T, typename W, typename H, typename E, typename S, typename A>
143
147
  typename frequent_items_sketch<T, W, H, E, S, A>::vector_row
144
148
  frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error_type err_type, W threshold) const {
145
- vector_row items;
149
+ vector_row items(map.get_allocator());
146
150
  for (auto &it: map) {
147
151
  const W lb = it.second;
148
152
  const W ub = it.second + offset;
@@ -182,19 +186,21 @@ void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os) const
182
186
  os.write((char*)&offset, sizeof(offset));
183
187
 
184
188
  // copy active items and their weights to use batch serialization
185
- typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
186
- W* weights = AllocW().allocate(num_items);
187
- T* items = A().allocate(num_items);
189
+ using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
190
+ AllocW aw(map.get_allocator());
191
+ W* weights = aw.allocate(num_items);
192
+ A alloc(map.get_allocator());
193
+ T* items = alloc.allocate(num_items);
188
194
  uint32_t i = 0;
189
195
  for (auto &it: map) {
190
196
  new (&items[i]) T(it.first);
191
197
  weights[i++] = it.second;
192
198
  }
193
199
  os.write((char*)weights, sizeof(W) * num_items);
194
- AllocW().deallocate(weights, num_items);
200
+ aw.deallocate(weights, num_items);
195
201
  S().serialize(os, items, num_items);
196
202
  for (unsigned i = 0; i < num_items; i++) items[i].~T();
197
- A().deallocate(items, num_items);
203
+ alloc.deallocate(items, num_items);
198
204
  }
199
205
  }
200
206
 
@@ -207,9 +213,9 @@ size_t frequent_items_sketch<T, W, H, E, S, A>::get_serialized_size_bytes() cons
207
213
  }
208
214
 
209
215
  template<typename T, typename W, typename H, typename E, typename S, typename A>
210
- vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const {
216
+ auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
211
217
  const size_t size = header_size_bytes + get_serialized_size_bytes();
212
- vector_u8<A> bytes(size);
218
+ vector_bytes bytes(size, 0, map.get_allocator());
213
219
  uint8_t* ptr = bytes.data() + header_size_bytes;
214
220
  uint8_t* end_ptr = ptr + size;
215
221
 
@@ -238,20 +244,22 @@ vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_
238
244
  ptr += copy_to_mem(&offset, ptr, sizeof(offset));
239
245
 
240
246
  // copy active items and their weights to use batch serialization
241
- typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
242
- W* weights = AllocW().allocate(num_items);
243
- T* items = A().allocate(num_items);
247
+ using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
248
+ AllocW aw(map.get_allocator());
249
+ W* weights = aw.allocate(num_items);
250
+ A alloc(map.get_allocator());
251
+ T* items = alloc.allocate(num_items);
244
252
  uint32_t i = 0;
245
253
  for (auto &it: map) {
246
254
  new (&items[i]) T(it.first);
247
255
  weights[i++] = it.second;
248
256
  }
249
257
  ptr += copy_to_mem(weights, ptr, sizeof(W) * num_items);
250
- AllocW().deallocate(weights, num_items);
258
+ aw.deallocate(weights, num_items);
251
259
  const size_t bytes_remaining = end_ptr - ptr;
252
260
  ptr += S().serialize(ptr, bytes_remaining, items, num_items);
253
261
  for (unsigned i = 0; i < num_items; i++) items[i].~T();
254
- A().deallocate(items, num_items);
262
+ alloc.deallocate(items, num_items);
255
263
  }
256
264
  return bytes;
257
265
  }
@@ -259,23 +267,25 @@ vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_
259
267
  template<typename T, typename W, typename H, typename E, typename S, typename A>
260
268
  class frequent_items_sketch<T, W, H, E, S, A>::items_deleter {
261
269
  public:
262
- items_deleter(uint32_t num, bool destroy): num(num), destroy(destroy) {}
270
+ items_deleter(uint32_t num, bool destroy, const A& allocator):
271
+ allocator(allocator), num(num), destroy(destroy) {}
263
272
  void set_destroy(bool destroy) { this->destroy = destroy; }
264
- void operator() (T* ptr) const {
273
+ void operator() (T* ptr) {
265
274
  if (ptr != nullptr) {
266
275
  if (destroy) {
267
276
  for (uint32_t i = 0; i < num; ++i) ptr[i].~T();
268
277
  }
269
- A().deallocate(ptr, num);
278
+ allocator.deallocate(ptr, num);
270
279
  }
271
280
  }
272
281
  private:
282
+ A allocator;
273
283
  uint32_t num;
274
284
  bool destroy;
275
285
  };
276
286
 
277
287
  template<typename T, typename W, typename H, typename E, typename S, typename A>
278
- frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is) {
288
+ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is, const A& allocator) {
279
289
  uint8_t preamble_longs;
280
290
  is.read((char*)&preamble_longs, sizeof(preamble_longs));
281
291
  uint8_t serial_version;
@@ -298,7 +308,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
298
308
  check_family_id(family_id);
299
309
  check_size(lg_cur_size, lg_max_size);
300
310
 
301
- frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
311
+ frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
302
312
  if (!is_empty) {
303
313
  uint32_t num_items;
304
314
  is.read((char*)&num_items, sizeof(num_items));
@@ -310,10 +320,11 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
310
320
  is.read((char*)&offset, sizeof(offset));
311
321
 
312
322
  // batch deserialization with intermediate array of items and weights
313
- typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
314
- std::vector<W, AllocW> weights(num_items);
323
+ using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
324
+ std::vector<W, AllocW> weights(num_items, 0, allocator);
315
325
  is.read((char*)weights.data(), sizeof(W) * num_items);
316
- std::unique_ptr<T, items_deleter> items(A().allocate(num_items), items_deleter(num_items, false));
326
+ A alloc(allocator);
327
+ std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
317
328
  S().deserialize(is, items.get(), num_items);
318
329
  items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
319
330
  for (uint32_t i = 0; i < num_items; i++) {
@@ -328,7 +339,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
328
339
  }
329
340
 
330
341
  template<typename T, typename W, typename H, typename E, typename S, typename A>
331
- frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size) {
342
+ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
332
343
  ensure_minimum_memory(size, 8);
333
344
  const char* ptr = static_cast<const char*>(bytes);
334
345
  const char* base = static_cast<const char*>(bytes);
@@ -355,7 +366,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
355
366
  check_size(lg_cur_size, lg_max_size);
356
367
  ensure_minimum_memory(size, 1 << preamble_longs);
357
368
 
358
- frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
369
+ frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
359
370
  if (!is_empty) {
360
371
  uint32_t num_items;
361
372
  ptr += copy_from_mem(ptr, &num_items, sizeof(uint32_t));
@@ -368,10 +379,11 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
368
379
 
369
380
  ensure_minimum_memory(size, ptr - base + (sizeof(W) * num_items));
370
381
  // batch deserialization with intermediate array of items and weights
371
- typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
372
- std::vector<W, AllocW> weights(num_items);
382
+ using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
383
+ std::vector<W, AllocW> weights(num_items, 0, allocator);
373
384
  ptr += copy_from_mem(ptr, weights.data(), sizeof(W) * num_items);
374
- std::unique_ptr<T, items_deleter> items(A().allocate(num_items), items_deleter(num_items, false));
385
+ A alloc(allocator);
386
+ std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
375
387
  const size_t bytes_remaining = size - (ptr - base);
376
388
  ptr += S().deserialize(ptr, bytes_remaining, items.get(), num_items);
377
389
  items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
@@ -39,33 +39,39 @@ public:
39
39
  using AllocV = typename std::allocator_traits<A>::template rebind_alloc<V>;
40
40
  using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
41
41
 
42
- reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size);
42
+ reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size, const A& allocator);
43
43
  reverse_purge_hash_map(const reverse_purge_hash_map& other);
44
44
  reverse_purge_hash_map(reverse_purge_hash_map&& other) noexcept;
45
45
  ~reverse_purge_hash_map();
46
46
  reverse_purge_hash_map& operator=(reverse_purge_hash_map other);
47
47
  reverse_purge_hash_map& operator=(reverse_purge_hash_map&& other);
48
- V adjust_or_insert(const K& key, V value);
49
- V adjust_or_insert(K&& key, V value);
48
+
49
+ template<typename FwdK>
50
+ V adjust_or_insert(FwdK&& key, V value);
51
+
50
52
  V get(const K& key) const;
51
53
  uint8_t get_lg_cur_size() const;
52
54
  uint8_t get_lg_max_size() const;
53
55
  uint32_t get_capacity() const;
54
56
  uint32_t get_num_active() const;
57
+ const A& get_allocator() const;
58
+
55
59
  class iterator;
56
60
  iterator begin() const;
57
61
  iterator end() const;
62
+
58
63
  private:
59
64
  static constexpr double LOAD_FACTOR = 0.75;
60
65
  static constexpr uint16_t DRIFT_LIMIT = 1024; // used only for stress testing
61
66
  static constexpr uint32_t MAX_SAMPLE_SIZE = 1024; // number of samples to compute approximate median during purge
62
67
 
63
- uint8_t lg_cur_size;
64
- uint8_t lg_max_size;
65
- uint32_t num_active;
66
- K* keys;
67
- V* values;
68
- uint16_t* states;
68
+ A allocator_;
69
+ uint8_t lg_cur_size_;
70
+ uint8_t lg_max_size_;
71
+ uint32_t num_active_;
72
+ K* keys_;
73
+ V* values_;
74
+ uint16_t* states_;
69
75
 
70
76
  inline bool is_active(uint32_t probe) const;
71
77
  void subtract_and_keep_positive_only(V amount);
@@ -83,8 +89,8 @@ public:
83
89
  friend class reverse_purge_hash_map<K, V, H, E, A>;
84
90
  iterator& operator++() {
85
91
  ++count;
86
- if (count < map->num_active) {
87
- const uint32_t mask = (1 << map->lg_cur_size) - 1;
92
+ if (count < map->num_active_) {
93
+ const uint32_t mask = (1 << map->lg_cur_size_) - 1;
88
94
  do {
89
95
  index = (index + stride) & mask;
90
96
  } while (!map->is_active(index));
@@ -95,7 +101,7 @@ public:
95
101
  bool operator==(const iterator& rhs) const { return count == rhs.count; }
96
102
  bool operator!=(const iterator& rhs) const { return count != rhs.count; }
97
103
  const std::pair<K&, V> operator*() const {
98
- return std::pair<K&, V>(map->keys[index], map->values[index]);
104
+ return std::pair<K&, V>(map->keys_[index], map->values_[index]);
99
105
  }
100
106
  private:
101
107
  static constexpr double GOLDEN_RATIO_RECIPROCAL = 0.6180339887498949; // = (sqrt(5) - 1) / 2
@@ -104,7 +110,7 @@ private:
104
110
  uint32_t count;
105
111
  uint32_t stride;
106
112
  iterator(const reverse_purge_hash_map<K, V, H, E, A>* map, uint32_t index, uint32_t count):
107
- map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size) * GOLDEN_RATIO_RECIPROCAL) | 1) {}
113
+ map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size_) * GOLDEN_RATIO_RECIPROCAL) | 1) {}
108
114
  };
109
115
 
110
116
  } /* namespace datasketches */
@@ -34,113 +34,121 @@ template<typename K, typename V, typename H, typename E, typename A>
34
34
  constexpr uint32_t reverse_purge_hash_map<K, V, H, E, A>::MAX_SAMPLE_SIZE;
35
35
 
36
36
  template<typename K, typename V, typename H, typename E, typename A>
37
- reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size):
38
- lg_cur_size(lg_cur_size),
39
- lg_max_size(lg_max_size),
40
- num_active(0),
41
- keys(A().allocate(1 << lg_cur_size)),
42
- values(AllocV().allocate(1 << lg_cur_size)),
43
- states(AllocU16().allocate(1 << lg_cur_size))
37
+ reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size, const A& allocator):
38
+ allocator_(allocator),
39
+ lg_cur_size_(lg_cur_size),
40
+ lg_max_size_(lg_max_size),
41
+ num_active_(0),
42
+ keys_(allocator_.allocate(1 << lg_cur_size)),
43
+ values_(nullptr),
44
+ states_(nullptr)
44
45
  {
45
- std::fill(states, &states[1 << lg_cur_size], 0);
46
+ AllocV av(allocator_);
47
+ values_ = av.allocate(1 << lg_cur_size);
48
+ AllocU16 au16(allocator_);
49
+ states_ = au16.allocate(1 << lg_cur_size);
50
+ std::fill(states_, states_ + (1 << lg_cur_size), 0);
46
51
  }
47
52
 
48
53
  template<typename K, typename V, typename H, typename E, typename A>
49
54
  reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(const reverse_purge_hash_map<K, V, H, E, A>& other):
50
- lg_cur_size(other.lg_cur_size),
51
- lg_max_size(other.lg_max_size),
52
- num_active(other.num_active),
53
- keys(A().allocate(1 << lg_cur_size)),
54
- values(AllocV().allocate(1 << lg_cur_size)),
55
- states(AllocU16().allocate(1 << lg_cur_size))
55
+ allocator_(other.allocator_),
56
+ lg_cur_size_(other.lg_cur_size_),
57
+ lg_max_size_(other.lg_max_size_),
58
+ num_active_(other.num_active_),
59
+ keys_(allocator_.allocate(1 << lg_cur_size_)),
60
+ values_(nullptr),
61
+ states_(nullptr)
56
62
  {
57
- const uint32_t size = 1 << lg_cur_size;
58
- if (num_active > 0) {
59
- auto num = num_active;
63
+ AllocV av(allocator_);
64
+ values_ = av.allocate(1 << lg_cur_size_);
65
+ AllocU16 au16(allocator_);
66
+ states_ = au16.allocate(1 << lg_cur_size_);
67
+ const uint32_t size = 1 << lg_cur_size_;
68
+ if (num_active_ > 0) {
69
+ auto num = num_active_;
60
70
  for (uint32_t i = 0; i < size; i++) {
61
- if (other.states[i] > 0) {
62
- new (&keys[i]) K(other.keys[i]);
63
- values[i] = other.values[i];
71
+ if (other.states_[i] > 0) {
72
+ new (&keys_[i]) K(other.keys_[i]);
73
+ values_[i] = other.values_[i];
64
74
  }
65
75
  if (--num == 0) break;
66
76
  }
67
77
  }
68
- std::copy(&other.states[0], &other.states[size], states);
78
+ std::copy(other.states_, other.states_ + size, states_);
69
79
  }
70
80
 
71
81
  template<typename K, typename V, typename H, typename E, typename A>
72
82
  reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(reverse_purge_hash_map<K, V, H, E, A>&& other) noexcept:
73
- lg_cur_size(other.lg_cur_size),
74
- lg_max_size(other.lg_max_size),
75
- num_active(other.num_active),
76
- keys(nullptr),
77
- values(nullptr),
78
- states(nullptr)
83
+ allocator_(std::move(other.allocator_)),
84
+ lg_cur_size_(other.lg_cur_size_),
85
+ lg_max_size_(other.lg_max_size_),
86
+ num_active_(other.num_active_),
87
+ keys_(nullptr),
88
+ values_(nullptr),
89
+ states_(nullptr)
79
90
  {
80
- std::swap(keys, other.keys);
81
- std::swap(values, other.values);
82
- std::swap(states, other.states);
83
- other.num_active = 0;
91
+ std::swap(keys_, other.keys_);
92
+ std::swap(values_, other.values_);
93
+ std::swap(states_, other.states_);
94
+ other.num_active_ = 0;
84
95
  }
85
96
 
86
97
  template<typename K, typename V, typename H, typename E, typename A>
87
98
  reverse_purge_hash_map<K, V, H, E, A>::~reverse_purge_hash_map() {
88
- const uint32_t size = 1 << lg_cur_size;
89
- if (num_active > 0) {
99
+ const uint32_t size = 1 << lg_cur_size_;
100
+ if (num_active_ > 0) {
90
101
  for (uint32_t i = 0; i < size; i++) {
91
102
  if (is_active(i)) {
92
- keys[i].~K();
93
- if (--num_active == 0) break;
103
+ keys_[i].~K();
104
+ if (--num_active_ == 0) break;
94
105
  }
95
106
  }
96
107
  }
97
- if (keys != nullptr)
98
- A().deallocate(keys, size);
99
- if (values != nullptr)
100
- AllocV().deallocate(values, size);
101
- if (states != nullptr)
102
- AllocU16().deallocate(states, size);
108
+ if (keys_ != nullptr) {
109
+ allocator_.deallocate(keys_, size);
110
+ }
111
+ if (values_ != nullptr) {
112
+ AllocV av(allocator_);
113
+ av.deallocate(values_, size);
114
+ }
115
+ if (states_ != nullptr) {
116
+ AllocU16 au16(allocator_);
117
+ au16.deallocate(states_, size);
118
+ }
103
119
  }
104
120
 
105
121
  template<typename K, typename V, typename H, typename E, typename A>
106
122
  reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A> other) {
107
- std::swap(lg_cur_size, other.lg_cur_size);
108
- std::swap(lg_max_size, other.lg_max_size);
109
- std::swap(num_active, other.num_active);
110
- std::swap(keys, other.keys);
111
- std::swap(values, other.values);
112
- std::swap(states, other.states);
123
+ std::swap(allocator_, other.allocator_);
124
+ std::swap(lg_cur_size_, other.lg_cur_size_);
125
+ std::swap(lg_max_size_, other.lg_max_size_);
126
+ std::swap(num_active_, other.num_active_);
127
+ std::swap(keys_, other.keys_);
128
+ std::swap(values_, other.values_);
129
+ std::swap(states_, other.states_);
113
130
  return *this;
114
131
  }
115
132
 
116
133
  template<typename K, typename V, typename H, typename E, typename A>
117
134
  reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A>&& other) {
118
- std::swap(lg_cur_size, other.lg_cur_size);
119
- std::swap(lg_max_size, other.lg_max_size);
120
- std::swap(num_active, other.num_active);
121
- std::swap(keys, other.keys);
122
- std::swap(values, other.values);
123
- std::swap(states, other.states);
135
+ std::swap(allocator_, other.allocator_);
136
+ std::swap(lg_cur_size_, other.lg_cur_size_);
137
+ std::swap(lg_max_size_, other.lg_max_size_);
138
+ std::swap(num_active_, other.num_active_);
139
+ std::swap(keys_, other.keys_);
140
+ std::swap(values_, other.values_);
141
+ std::swap(states_, other.states_);
124
142
  return *this;
125
143
  }
126
144
 
127
145
  template<typename K, typename V, typename H, typename E, typename A>
128
- V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(const K& key, V value) {
129
- const uint32_t num_active_before = num_active;
146
+ template<typename FwdK>
147
+ V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(FwdK&& key, V value) {
148
+ const uint32_t num_active_before = num_active_;
130
149
  const uint32_t index = internal_adjust_or_insert(key, value);
131
- if (num_active > num_active_before) {
132
- new (&keys[index]) K(key);
133
- return resize_or_purge_if_needed();
134
- }
135
- return 0;
136
- }
137
-
138
- template<typename K, typename V, typename H, typename E, typename A>
139
- V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(K&& key, V value) {
140
- const uint32_t num_active_before = num_active;
141
- const uint32_t index = internal_adjust_or_insert(key, value);
142
- if (num_active > num_active_before) {
143
- new (&keys[index]) K(std::move(key));
150
+ if (num_active_ > num_active_before) {
151
+ new (&keys_[index]) K(std::forward<FwdK>(key));
144
152
  return resize_or_purge_if_needed();
145
153
  }
146
154
  return 0;
@@ -148,10 +156,10 @@ V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(K&& key, V value) {
148
156
 
149
157
  template<typename K, typename V, typename H, typename E, typename A>
150
158
  V reverse_purge_hash_map<K, V, H, E, A>::get(const K& key) const {
151
- const uint32_t mask = (1 << lg_cur_size) - 1;
159
+ const uint32_t mask = (1 << lg_cur_size_) - 1;
152
160
  uint32_t probe = fmix64(H()(key)) & mask;
153
161
  while (is_active(probe)) {
154
- if (E()(keys[probe], key)) return values[probe];
162
+ if (E()(keys_[probe], key)) return values_[probe];
155
163
  probe = (probe + 1) & mask;
156
164
  }
157
165
  return 0;
@@ -159,27 +167,32 @@ V reverse_purge_hash_map<K, V, H, E, A>::get(const K& key) const {
159
167
 
160
168
  template<typename K, typename V, typename H, typename E, typename A>
161
169
  uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_cur_size() const {
162
- return lg_cur_size;
170
+ return lg_cur_size_;
163
171
  }
164
172
 
165
173
  template<typename K, typename V, typename H, typename E, typename A>
166
174
  uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_max_size() const {
167
- return lg_max_size;
175
+ return lg_max_size_;
168
176
  }
169
177
 
170
178
  template<typename K, typename V, typename H, typename E, typename A>
171
179
  uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_capacity() const {
172
- return (1 << lg_cur_size) * LOAD_FACTOR;
180
+ return (1 << lg_cur_size_) * LOAD_FACTOR;
173
181
  }
174
182
 
175
183
  template<typename K, typename V, typename H, typename E, typename A>
176
184
  uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_num_active() const {
177
- return num_active;
185
+ return num_active_;
186
+ }
187
+
188
+ template<typename K, typename V, typename H, typename E, typename A>
189
+ const A& reverse_purge_hash_map<K, V, H, E, A>::get_allocator() const {
190
+ return allocator_;
178
191
  }
179
192
 
180
193
  template<typename K, typename V, typename H, typename E, typename A>
181
194
  typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::begin() const {
182
- const uint32_t size = 1 << lg_cur_size;
195
+ const uint32_t size = 1 << lg_cur_size_;
183
196
  uint32_t i = 0;
184
197
  while (i < size && !is_active(i)) i++;
185
198
  return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, i, 0);
@@ -187,40 +200,40 @@ typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<
187
200
 
188
201
  template<typename K, typename V, typename H, typename E, typename A>
189
202
  typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::end() const {
190
- return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, 1 << lg_cur_size, num_active);
203
+ return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, 1 << lg_cur_size_, num_active_);
191
204
  }
192
205
 
193
206
  template<typename K, typename V, typename H, typename E, typename A>
194
207
  bool reverse_purge_hash_map<K, V, H, E, A>::is_active(uint32_t index) const {
195
- return states[index] > 0;
208
+ return states_[index] > 0;
196
209
  }
197
210
 
198
211
  template<typename K, typename V, typename H, typename E, typename A>
199
212
  void reverse_purge_hash_map<K, V, H, E, A>::subtract_and_keep_positive_only(V amount) {
200
213
  // starting from the back, find the first empty cell,
201
214
  // which establishes the high end of a cluster.
202
- uint32_t first_probe = (1 << lg_cur_size) - 1;
215
+ uint32_t first_probe = (1 << lg_cur_size_) - 1;
203
216
  while (is_active(first_probe)) first_probe--;
204
217
  // when we find the next non-empty cell, we know we are at the high end of a cluster
205
218
  // work towards the front, delete any non-positive entries.
206
219
  for (uint32_t probe = first_probe; probe-- > 0;) {
207
220
  if (is_active(probe)) {
208
- if (values[probe] <= amount) {
221
+ if (values_[probe] <= amount) {
209
222
  hash_delete(probe); // does the work of deletion and moving higher items towards the front
210
- num_active--;
223
+ num_active_--;
211
224
  } else {
212
- values[probe] -= amount;
225
+ values_[probe] -= amount;
213
226
  }
214
227
  }
215
228
  }
216
229
  // now work on the first cluster that was skipped
217
- for (uint32_t probe = (1 << lg_cur_size); probe-- > first_probe;) {
230
+ for (uint32_t probe = (1 << lg_cur_size_); probe-- > first_probe;) {
218
231
  if (is_active(probe)) {
219
- if (values[probe] <= amount) {
232
+ if (values_[probe] <= amount) {
220
233
  hash_delete(probe);
221
- num_active--;
234
+ num_active_--;
222
235
  } else {
223
- values[probe] -= amount;
236
+ values_[probe] -= amount;
224
237
  }
225
238
  }
226
239
  }
@@ -231,20 +244,20 @@ void reverse_purge_hash_map<K, V, H, E, A>::hash_delete(uint32_t delete_index) {
231
244
  // Looks ahead in the table to search for another
232
245
  // item to move to this location
233
246
  // if none are found, the status is changed
234
- states[delete_index] = 0; // mark as empty
235
- keys[delete_index].~K();
247
+ states_[delete_index] = 0; // mark as empty
248
+ keys_[delete_index].~K();
236
249
  uint32_t drift = 1;
237
- const uint32_t mask = (1 << lg_cur_size) - 1;
250
+ const uint32_t mask = (1 << lg_cur_size_) - 1;
238
251
  uint32_t probe = (delete_index + drift) & mask; // map length must be a power of 2
239
252
  // advance until we find a free location replacing locations as needed
240
253
  while (is_active(probe)) {
241
- if (states[probe] > drift) {
254
+ if (states_[probe] > drift) {
242
255
  // move current element
243
- new (&keys[delete_index]) K(std::move(keys[probe]));
244
- values[delete_index] = values[probe];
245
- states[delete_index] = states[probe] - drift;
246
- states[probe] = 0; // mark as empty
247
- keys[probe].~K();
256
+ new (&keys_[delete_index]) K(std::move(keys_[probe]));
257
+ values_[delete_index] = values_[probe];
258
+ states_[delete_index] = states_[probe] - drift;
259
+ states_[probe] = 0; // mark as empty
260
+ keys_[probe].~K();
248
261
  drift = 0;
249
262
  delete_index = probe;
250
263
  }
@@ -257,13 +270,13 @@ void reverse_purge_hash_map<K, V, H, E, A>::hash_delete(uint32_t delete_index) {
257
270
 
258
271
  template<typename K, typename V, typename H, typename E, typename A>
259
272
  uint32_t reverse_purge_hash_map<K, V, H, E, A>::internal_adjust_or_insert(const K& key, V value) {
260
- const uint32_t mask = (1 << lg_cur_size) - 1;
273
+ const uint32_t mask = (1 << lg_cur_size_) - 1;
261
274
  uint32_t index = fmix64(H()(key)) & mask;
262
275
  uint16_t drift = 1;
263
276
  while (is_active(index)) {
264
- if (E()(keys[index], key)) {
277
+ if (E()(keys_[index], key)) {
265
278
  // adjusting the value of an existing key
266
- values[index] += value;
279
+ values_[index] += value;
267
280
  return index;
268
281
  }
269
282
  index = (index + 1) & mask;
@@ -272,23 +285,23 @@ uint32_t reverse_purge_hash_map<K, V, H, E, A>::internal_adjust_or_insert(const
272
285
  if (drift >= DRIFT_LIMIT) throw std::logic_error("drift limit reached");
273
286
  }
274
287
  // adding the key and value to the table
275
- if (num_active > get_capacity()) {
276
- throw std::logic_error("num_active " + std::to_string(num_active) + " > capacity " + std::to_string(get_capacity()));
288
+ if (num_active_ > get_capacity()) {
289
+ throw std::logic_error("num_active " + std::to_string(num_active_) + " > capacity " + std::to_string(get_capacity()));
277
290
  }
278
- values[index] = value;
279
- states[index] = drift;
280
- num_active++;
291
+ values_[index] = value;
292
+ states_[index] = drift;
293
+ num_active_++;
281
294
  return index;
282
295
  }
283
296
 
284
297
  template<typename K, typename V, typename H, typename E, typename A>
285
298
  V reverse_purge_hash_map<K, V, H, E, A>::resize_or_purge_if_needed() {
286
- if (num_active > get_capacity()) {
287
- if (lg_cur_size < lg_max_size) { // can grow
288
- resize(lg_cur_size + 1);
299
+ if (num_active_ > get_capacity()) {
300
+ if (lg_cur_size_ < lg_max_size_) { // can grow
301
+ resize(lg_cur_size_ + 1);
289
302
  } else { // at target size, must purge
290
303
  const V offset = purge();
291
- if (num_active > get_capacity()) {
304
+ if (num_active_ > get_capacity()) {
292
305
  throw std::logic_error("purge did not reduce number of active items");
293
306
  }
294
307
  return offset;
@@ -299,43 +312,46 @@ V reverse_purge_hash_map<K, V, H, E, A>::resize_or_purge_if_needed() {
299
312
 
300
313
  template<typename K, typename V, typename H, typename E, typename A>
301
314
  void reverse_purge_hash_map<K, V, H, E, A>::resize(uint8_t lg_new_size) {
302
- const uint32_t old_size = 1 << lg_cur_size;
303
- K* old_keys = keys;
304
- V* old_values = values;
305
- uint16_t* old_states = states;
315
+ const uint32_t old_size = 1 << lg_cur_size_;
316
+ K* old_keys = keys_;
317
+ V* old_values = values_;
318
+ uint16_t* old_states = states_;
306
319
  const uint32_t new_size = 1 << lg_new_size;
307
- keys = A().allocate(new_size);
308
- values = AllocV().allocate(new_size);
309
- states = AllocU16().allocate(new_size);
310
- std::fill(states, &states[new_size], 0);
311
- num_active = 0;
312
- lg_cur_size = lg_new_size;
320
+ keys_ = allocator_.allocate(new_size);
321
+ AllocV av(allocator_);
322
+ values_ = av.allocate(new_size);
323
+ AllocU16 au16(allocator_);
324
+ states_ = au16.allocate(new_size);
325
+ std::fill(states_, states_ + new_size, 0);
326
+ num_active_ = 0;
327
+ lg_cur_size_ = lg_new_size;
313
328
  for (uint32_t i = 0; i < old_size; i++) {
314
329
  if (old_states[i] > 0) {
315
330
  adjust_or_insert(std::move(old_keys[i]), old_values[i]);
316
331
  old_keys[i].~K();
317
332
  }
318
333
  }
319
- A().deallocate(old_keys, old_size);
320
- AllocV().deallocate(old_values, old_size);
321
- AllocU16().deallocate(old_states, old_size);
334
+ allocator_.deallocate(old_keys, old_size);
335
+ av.deallocate(old_values, old_size);
336
+ au16.deallocate(old_states, old_size);
322
337
  }
323
338
 
324
339
  template<typename K, typename V, typename H, typename E, typename A>
325
340
  V reverse_purge_hash_map<K, V, H, E, A>::purge() {
326
- const uint32_t limit = std::min(MAX_SAMPLE_SIZE, num_active);
341
+ const uint32_t limit = std::min(MAX_SAMPLE_SIZE, num_active_);
327
342
  uint32_t num_samples = 0;
328
343
  uint32_t i = 0;
329
- V* samples = AllocV().allocate(limit);
344
+ AllocV av(allocator_);
345
+ V* samples = av.allocate(limit);
330
346
  while (num_samples < limit) {
331
347
  if (is_active(i)) {
332
- samples[num_samples++] = values[i];
348
+ samples[num_samples++] = values_[i];
333
349
  }
334
350
  i++;
335
351
  }
336
- std::nth_element(&samples[0], &samples[num_samples / 2], &samples[num_samples]);
352
+ std::nth_element(samples, samples+ (num_samples / 2), samples + num_samples);
337
353
  const V median = samples[num_samples / 2];
338
- AllocV().deallocate(samples, limit);
354
+ av.deallocate(samples, limit);
339
355
  subtract_and_keep_positive_only(median);
340
356
  return median;
341
357
  }