datasketches 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
- data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
- data/vendor/datasketches-cpp/python/README.md +6 -3
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
- data/vendor/datasketches-cpp/setup.py +5 -3
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
- metadata +43 -34
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
|
@@ -40,15 +40,20 @@ namespace datasketches {
|
|
|
40
40
|
|
|
41
41
|
enum frequent_items_error_type { NO_FALSE_POSITIVES, NO_FALSE_NEGATIVES };
|
|
42
42
|
|
|
43
|
-
// for serialization as raw bytes
|
|
44
|
-
template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
|
|
45
|
-
template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
|
|
46
|
-
|
|
47
43
|
// type W for weight must be an arithmetic type (integral or floating point)
|
|
48
|
-
template<
|
|
44
|
+
template<
|
|
45
|
+
typename T,
|
|
46
|
+
typename W = uint64_t,
|
|
47
|
+
typename H = std::hash<T>,
|
|
48
|
+
typename E = std::equal_to<T>,
|
|
49
|
+
typename S = serde<T>,
|
|
50
|
+
typename A = std::allocator<T>
|
|
51
|
+
>
|
|
49
52
|
class frequent_items_sketch {
|
|
50
53
|
public:
|
|
51
54
|
|
|
55
|
+
static const uint8_t LG_MIN_MAP_SIZE = 3;
|
|
56
|
+
|
|
52
57
|
/**
|
|
53
58
|
* Construct this sketch with parameters lg_max_map_size and lg_start_map_size.
|
|
54
59
|
*
|
|
@@ -59,7 +64,7 @@ public:
|
|
|
59
64
|
* @param lg_start_map_size Log2 of the starting physical size of the internal hash
|
|
60
65
|
* map managed by this sketch.
|
|
61
66
|
*/
|
|
62
|
-
explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE);
|
|
67
|
+
explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE, const A& allocator = A());
|
|
63
68
|
|
|
64
69
|
/**
|
|
65
70
|
* Update this sketch with an item and a positive weight (frequency count).
|
|
@@ -232,7 +237,8 @@ public:
|
|
|
232
237
|
|
|
233
238
|
// This is a convenience alias for users
|
|
234
239
|
// The type returned by the following serialize method
|
|
235
|
-
|
|
240
|
+
using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
|
|
241
|
+
|
|
236
242
|
|
|
237
243
|
/**
|
|
238
244
|
* This method serializes the sketch as a vector of bytes.
|
|
@@ -249,7 +255,7 @@ public:
|
|
|
249
255
|
* @param is input stream
|
|
250
256
|
* @return an instance of the sketch
|
|
251
257
|
*/
|
|
252
|
-
static frequent_items_sketch deserialize(std::istream& is);
|
|
258
|
+
static frequent_items_sketch deserialize(std::istream& is, const A& allocator = A());
|
|
253
259
|
|
|
254
260
|
/**
|
|
255
261
|
* This method deserializes a sketch from a given array of bytes.
|
|
@@ -257,7 +263,7 @@ public:
|
|
|
257
263
|
* @param size the size of the array
|
|
258
264
|
* @return an instance of the sketch
|
|
259
265
|
*/
|
|
260
|
-
static frequent_items_sketch deserialize(const void* bytes, size_t size);
|
|
266
|
+
static frequent_items_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
|
|
261
267
|
|
|
262
268
|
/**
|
|
263
269
|
* Returns a human readable summary of this sketch
|
|
@@ -266,7 +272,6 @@ public:
|
|
|
266
272
|
string<A> to_string(bool print_items = false) const;
|
|
267
273
|
|
|
268
274
|
private:
|
|
269
|
-
static const uint8_t LG_MIN_MAP_SIZE = 3;
|
|
270
275
|
static const uint8_t SERIAL_VERSION = 1;
|
|
271
276
|
static const uint8_t FAMILY_ID = 10;
|
|
272
277
|
static const uint8_t PREAMBLE_LONGS_EMPTY = 1;
|
|
@@ -33,10 +33,14 @@ template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
|
33
33
|
const uint8_t frequent_items_sketch<T, W, H, E, S, A>::LG_MIN_MAP_SIZE;
|
|
34
34
|
|
|
35
35
|
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
36
|
-
frequent_items_sketch<T, W, H, E, S, A>::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size):
|
|
36
|
+
frequent_items_sketch<T, W, H, E, S, A>::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size, const A& allocator):
|
|
37
37
|
total_weight(0),
|
|
38
38
|
offset(0),
|
|
39
|
-
map(
|
|
39
|
+
map(
|
|
40
|
+
std::max(lg_start_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE),
|
|
41
|
+
std::max(lg_max_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE),
|
|
42
|
+
allocator
|
|
43
|
+
)
|
|
40
44
|
{
|
|
41
45
|
if (lg_start_map_size > lg_max_map_size) throw std::invalid_argument("starting size must not be greater than maximum size");
|
|
42
46
|
}
|
|
@@ -142,7 +146,7 @@ frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error
|
|
|
142
146
|
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
143
147
|
typename frequent_items_sketch<T, W, H, E, S, A>::vector_row
|
|
144
148
|
frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error_type err_type, W threshold) const {
|
|
145
|
-
vector_row items;
|
|
149
|
+
vector_row items(map.get_allocator());
|
|
146
150
|
for (auto &it: map) {
|
|
147
151
|
const W lb = it.second;
|
|
148
152
|
const W ub = it.second + offset;
|
|
@@ -182,19 +186,21 @@ void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os) const
|
|
|
182
186
|
os.write((char*)&offset, sizeof(offset));
|
|
183
187
|
|
|
184
188
|
// copy active items and their weights to use batch serialization
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
189
|
+
using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
|
|
190
|
+
AllocW aw(map.get_allocator());
|
|
191
|
+
W* weights = aw.allocate(num_items);
|
|
192
|
+
A alloc(map.get_allocator());
|
|
193
|
+
T* items = alloc.allocate(num_items);
|
|
188
194
|
uint32_t i = 0;
|
|
189
195
|
for (auto &it: map) {
|
|
190
196
|
new (&items[i]) T(it.first);
|
|
191
197
|
weights[i++] = it.second;
|
|
192
198
|
}
|
|
193
199
|
os.write((char*)weights, sizeof(W) * num_items);
|
|
194
|
-
|
|
200
|
+
aw.deallocate(weights, num_items);
|
|
195
201
|
S().serialize(os, items, num_items);
|
|
196
202
|
for (unsigned i = 0; i < num_items; i++) items[i].~T();
|
|
197
|
-
|
|
203
|
+
alloc.deallocate(items, num_items);
|
|
198
204
|
}
|
|
199
205
|
}
|
|
200
206
|
|
|
@@ -207,9 +213,9 @@ size_t frequent_items_sketch<T, W, H, E, S, A>::get_serialized_size_bytes() cons
|
|
|
207
213
|
}
|
|
208
214
|
|
|
209
215
|
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
210
|
-
|
|
216
|
+
auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
|
|
211
217
|
const size_t size = header_size_bytes + get_serialized_size_bytes();
|
|
212
|
-
|
|
218
|
+
vector_bytes bytes(size, 0, map.get_allocator());
|
|
213
219
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
|
214
220
|
uint8_t* end_ptr = ptr + size;
|
|
215
221
|
|
|
@@ -238,20 +244,22 @@ vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_
|
|
|
238
244
|
ptr += copy_to_mem(&offset, ptr, sizeof(offset));
|
|
239
245
|
|
|
240
246
|
// copy active items and their weights to use batch serialization
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
247
|
+
using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
|
|
248
|
+
AllocW aw(map.get_allocator());
|
|
249
|
+
W* weights = aw.allocate(num_items);
|
|
250
|
+
A alloc(map.get_allocator());
|
|
251
|
+
T* items = alloc.allocate(num_items);
|
|
244
252
|
uint32_t i = 0;
|
|
245
253
|
for (auto &it: map) {
|
|
246
254
|
new (&items[i]) T(it.first);
|
|
247
255
|
weights[i++] = it.second;
|
|
248
256
|
}
|
|
249
257
|
ptr += copy_to_mem(weights, ptr, sizeof(W) * num_items);
|
|
250
|
-
|
|
258
|
+
aw.deallocate(weights, num_items);
|
|
251
259
|
const size_t bytes_remaining = end_ptr - ptr;
|
|
252
260
|
ptr += S().serialize(ptr, bytes_remaining, items, num_items);
|
|
253
261
|
for (unsigned i = 0; i < num_items; i++) items[i].~T();
|
|
254
|
-
|
|
262
|
+
alloc.deallocate(items, num_items);
|
|
255
263
|
}
|
|
256
264
|
return bytes;
|
|
257
265
|
}
|
|
@@ -259,23 +267,25 @@ vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_
|
|
|
259
267
|
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
260
268
|
class frequent_items_sketch<T, W, H, E, S, A>::items_deleter {
|
|
261
269
|
public:
|
|
262
|
-
items_deleter(uint32_t num, bool destroy
|
|
270
|
+
items_deleter(uint32_t num, bool destroy, const A& allocator):
|
|
271
|
+
allocator(allocator), num(num), destroy(destroy) {}
|
|
263
272
|
void set_destroy(bool destroy) { this->destroy = destroy; }
|
|
264
|
-
void operator() (T* ptr)
|
|
273
|
+
void operator() (T* ptr) {
|
|
265
274
|
if (ptr != nullptr) {
|
|
266
275
|
if (destroy) {
|
|
267
276
|
for (uint32_t i = 0; i < num; ++i) ptr[i].~T();
|
|
268
277
|
}
|
|
269
|
-
|
|
278
|
+
allocator.deallocate(ptr, num);
|
|
270
279
|
}
|
|
271
280
|
}
|
|
272
281
|
private:
|
|
282
|
+
A allocator;
|
|
273
283
|
uint32_t num;
|
|
274
284
|
bool destroy;
|
|
275
285
|
};
|
|
276
286
|
|
|
277
287
|
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
278
|
-
frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is) {
|
|
288
|
+
frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is, const A& allocator) {
|
|
279
289
|
uint8_t preamble_longs;
|
|
280
290
|
is.read((char*)&preamble_longs, sizeof(preamble_longs));
|
|
281
291
|
uint8_t serial_version;
|
|
@@ -298,7 +308,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
|
|
|
298
308
|
check_family_id(family_id);
|
|
299
309
|
check_size(lg_cur_size, lg_max_size);
|
|
300
310
|
|
|
301
|
-
frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
|
|
311
|
+
frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
|
|
302
312
|
if (!is_empty) {
|
|
303
313
|
uint32_t num_items;
|
|
304
314
|
is.read((char*)&num_items, sizeof(num_items));
|
|
@@ -310,10 +320,11 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
|
|
|
310
320
|
is.read((char*)&offset, sizeof(offset));
|
|
311
321
|
|
|
312
322
|
// batch deserialization with intermediate array of items and weights
|
|
313
|
-
|
|
314
|
-
std::vector<W, AllocW> weights(num_items);
|
|
323
|
+
using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
|
|
324
|
+
std::vector<W, AllocW> weights(num_items, 0, allocator);
|
|
315
325
|
is.read((char*)weights.data(), sizeof(W) * num_items);
|
|
316
|
-
|
|
326
|
+
A alloc(allocator);
|
|
327
|
+
std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
|
|
317
328
|
S().deserialize(is, items.get(), num_items);
|
|
318
329
|
items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
|
|
319
330
|
for (uint32_t i = 0; i < num_items; i++) {
|
|
@@ -328,7 +339,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
|
|
|
328
339
|
}
|
|
329
340
|
|
|
330
341
|
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
331
|
-
frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size) {
|
|
342
|
+
frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
|
|
332
343
|
ensure_minimum_memory(size, 8);
|
|
333
344
|
const char* ptr = static_cast<const char*>(bytes);
|
|
334
345
|
const char* base = static_cast<const char*>(bytes);
|
|
@@ -355,7 +366,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
|
|
|
355
366
|
check_size(lg_cur_size, lg_max_size);
|
|
356
367
|
ensure_minimum_memory(size, 1 << preamble_longs);
|
|
357
368
|
|
|
358
|
-
frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
|
|
369
|
+
frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
|
|
359
370
|
if (!is_empty) {
|
|
360
371
|
uint32_t num_items;
|
|
361
372
|
ptr += copy_from_mem(ptr, &num_items, sizeof(uint32_t));
|
|
@@ -368,10 +379,11 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
|
|
|
368
379
|
|
|
369
380
|
ensure_minimum_memory(size, ptr - base + (sizeof(W) * num_items));
|
|
370
381
|
// batch deserialization with intermediate array of items and weights
|
|
371
|
-
|
|
372
|
-
std::vector<W, AllocW> weights(num_items);
|
|
382
|
+
using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
|
|
383
|
+
std::vector<W, AllocW> weights(num_items, 0, allocator);
|
|
373
384
|
ptr += copy_from_mem(ptr, weights.data(), sizeof(W) * num_items);
|
|
374
|
-
|
|
385
|
+
A alloc(allocator);
|
|
386
|
+
std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
|
|
375
387
|
const size_t bytes_remaining = size - (ptr - base);
|
|
376
388
|
ptr += S().deserialize(ptr, bytes_remaining, items.get(), num_items);
|
|
377
389
|
items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
|
|
@@ -39,33 +39,39 @@ public:
|
|
|
39
39
|
using AllocV = typename std::allocator_traits<A>::template rebind_alloc<V>;
|
|
40
40
|
using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
|
|
41
41
|
|
|
42
|
-
reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size);
|
|
42
|
+
reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size, const A& allocator);
|
|
43
43
|
reverse_purge_hash_map(const reverse_purge_hash_map& other);
|
|
44
44
|
reverse_purge_hash_map(reverse_purge_hash_map&& other) noexcept;
|
|
45
45
|
~reverse_purge_hash_map();
|
|
46
46
|
reverse_purge_hash_map& operator=(reverse_purge_hash_map other);
|
|
47
47
|
reverse_purge_hash_map& operator=(reverse_purge_hash_map&& other);
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
|
|
49
|
+
template<typename FwdK>
|
|
50
|
+
V adjust_or_insert(FwdK&& key, V value);
|
|
51
|
+
|
|
50
52
|
V get(const K& key) const;
|
|
51
53
|
uint8_t get_lg_cur_size() const;
|
|
52
54
|
uint8_t get_lg_max_size() const;
|
|
53
55
|
uint32_t get_capacity() const;
|
|
54
56
|
uint32_t get_num_active() const;
|
|
57
|
+
const A& get_allocator() const;
|
|
58
|
+
|
|
55
59
|
class iterator;
|
|
56
60
|
iterator begin() const;
|
|
57
61
|
iterator end() const;
|
|
62
|
+
|
|
58
63
|
private:
|
|
59
64
|
static constexpr double LOAD_FACTOR = 0.75;
|
|
60
65
|
static constexpr uint16_t DRIFT_LIMIT = 1024; // used only for stress testing
|
|
61
66
|
static constexpr uint32_t MAX_SAMPLE_SIZE = 1024; // number of samples to compute approximate median during purge
|
|
62
67
|
|
|
63
|
-
|
|
64
|
-
uint8_t
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
68
|
+
A allocator_;
|
|
69
|
+
uint8_t lg_cur_size_;
|
|
70
|
+
uint8_t lg_max_size_;
|
|
71
|
+
uint32_t num_active_;
|
|
72
|
+
K* keys_;
|
|
73
|
+
V* values_;
|
|
74
|
+
uint16_t* states_;
|
|
69
75
|
|
|
70
76
|
inline bool is_active(uint32_t probe) const;
|
|
71
77
|
void subtract_and_keep_positive_only(V amount);
|
|
@@ -83,8 +89,8 @@ public:
|
|
|
83
89
|
friend class reverse_purge_hash_map<K, V, H, E, A>;
|
|
84
90
|
iterator& operator++() {
|
|
85
91
|
++count;
|
|
86
|
-
if (count < map->
|
|
87
|
-
const uint32_t mask = (1 << map->
|
|
92
|
+
if (count < map->num_active_) {
|
|
93
|
+
const uint32_t mask = (1 << map->lg_cur_size_) - 1;
|
|
88
94
|
do {
|
|
89
95
|
index = (index + stride) & mask;
|
|
90
96
|
} while (!map->is_active(index));
|
|
@@ -95,7 +101,7 @@ public:
|
|
|
95
101
|
bool operator==(const iterator& rhs) const { return count == rhs.count; }
|
|
96
102
|
bool operator!=(const iterator& rhs) const { return count != rhs.count; }
|
|
97
103
|
const std::pair<K&, V> operator*() const {
|
|
98
|
-
return std::pair<K&, V>(map->
|
|
104
|
+
return std::pair<K&, V>(map->keys_[index], map->values_[index]);
|
|
99
105
|
}
|
|
100
106
|
private:
|
|
101
107
|
static constexpr double GOLDEN_RATIO_RECIPROCAL = 0.6180339887498949; // = (sqrt(5) - 1) / 2
|
|
@@ -104,7 +110,7 @@ private:
|
|
|
104
110
|
uint32_t count;
|
|
105
111
|
uint32_t stride;
|
|
106
112
|
iterator(const reverse_purge_hash_map<K, V, H, E, A>* map, uint32_t index, uint32_t count):
|
|
107
|
-
map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->
|
|
113
|
+
map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size_) * GOLDEN_RATIO_RECIPROCAL) | 1) {}
|
|
108
114
|
};
|
|
109
115
|
|
|
110
116
|
} /* namespace datasketches */
|
|
@@ -34,113 +34,121 @@ template<typename K, typename V, typename H, typename E, typename A>
|
|
|
34
34
|
constexpr uint32_t reverse_purge_hash_map<K, V, H, E, A>::MAX_SAMPLE_SIZE;
|
|
35
35
|
|
|
36
36
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
37
|
-
reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size):
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
37
|
+
reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size, const A& allocator):
|
|
38
|
+
allocator_(allocator),
|
|
39
|
+
lg_cur_size_(lg_cur_size),
|
|
40
|
+
lg_max_size_(lg_max_size),
|
|
41
|
+
num_active_(0),
|
|
42
|
+
keys_(allocator_.allocate(1 << lg_cur_size)),
|
|
43
|
+
values_(nullptr),
|
|
44
|
+
states_(nullptr)
|
|
44
45
|
{
|
|
45
|
-
|
|
46
|
+
AllocV av(allocator_);
|
|
47
|
+
values_ = av.allocate(1 << lg_cur_size);
|
|
48
|
+
AllocU16 au16(allocator_);
|
|
49
|
+
states_ = au16.allocate(1 << lg_cur_size);
|
|
50
|
+
std::fill(states_, states_ + (1 << lg_cur_size), 0);
|
|
46
51
|
}
|
|
47
52
|
|
|
48
53
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
49
54
|
reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(const reverse_purge_hash_map<K, V, H, E, A>& other):
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
55
|
+
allocator_(other.allocator_),
|
|
56
|
+
lg_cur_size_(other.lg_cur_size_),
|
|
57
|
+
lg_max_size_(other.lg_max_size_),
|
|
58
|
+
num_active_(other.num_active_),
|
|
59
|
+
keys_(allocator_.allocate(1 << lg_cur_size_)),
|
|
60
|
+
values_(nullptr),
|
|
61
|
+
states_(nullptr)
|
|
56
62
|
{
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
63
|
+
AllocV av(allocator_);
|
|
64
|
+
values_ = av.allocate(1 << lg_cur_size_);
|
|
65
|
+
AllocU16 au16(allocator_);
|
|
66
|
+
states_ = au16.allocate(1 << lg_cur_size_);
|
|
67
|
+
const uint32_t size = 1 << lg_cur_size_;
|
|
68
|
+
if (num_active_ > 0) {
|
|
69
|
+
auto num = num_active_;
|
|
60
70
|
for (uint32_t i = 0; i < size; i++) {
|
|
61
|
-
if (other.
|
|
62
|
-
new (&
|
|
63
|
-
|
|
71
|
+
if (other.states_[i] > 0) {
|
|
72
|
+
new (&keys_[i]) K(other.keys_[i]);
|
|
73
|
+
values_[i] = other.values_[i];
|
|
64
74
|
}
|
|
65
75
|
if (--num == 0) break;
|
|
66
76
|
}
|
|
67
77
|
}
|
|
68
|
-
std::copy(
|
|
78
|
+
std::copy(other.states_, other.states_ + size, states_);
|
|
69
79
|
}
|
|
70
80
|
|
|
71
81
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
72
82
|
reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(reverse_purge_hash_map<K, V, H, E, A>&& other) noexcept:
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
83
|
+
allocator_(std::move(other.allocator_)),
|
|
84
|
+
lg_cur_size_(other.lg_cur_size_),
|
|
85
|
+
lg_max_size_(other.lg_max_size_),
|
|
86
|
+
num_active_(other.num_active_),
|
|
87
|
+
keys_(nullptr),
|
|
88
|
+
values_(nullptr),
|
|
89
|
+
states_(nullptr)
|
|
79
90
|
{
|
|
80
|
-
std::swap(
|
|
81
|
-
std::swap(
|
|
82
|
-
std::swap(
|
|
83
|
-
other.
|
|
91
|
+
std::swap(keys_, other.keys_);
|
|
92
|
+
std::swap(values_, other.values_);
|
|
93
|
+
std::swap(states_, other.states_);
|
|
94
|
+
other.num_active_ = 0;
|
|
84
95
|
}
|
|
85
96
|
|
|
86
97
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
87
98
|
reverse_purge_hash_map<K, V, H, E, A>::~reverse_purge_hash_map() {
|
|
88
|
-
const uint32_t size = 1 <<
|
|
89
|
-
if (
|
|
99
|
+
const uint32_t size = 1 << lg_cur_size_;
|
|
100
|
+
if (num_active_ > 0) {
|
|
90
101
|
for (uint32_t i = 0; i < size; i++) {
|
|
91
102
|
if (is_active(i)) {
|
|
92
|
-
|
|
93
|
-
if (--
|
|
103
|
+
keys_[i].~K();
|
|
104
|
+
if (--num_active_ == 0) break;
|
|
94
105
|
}
|
|
95
106
|
}
|
|
96
107
|
}
|
|
97
|
-
if (
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
108
|
+
if (keys_ != nullptr) {
|
|
109
|
+
allocator_.deallocate(keys_, size);
|
|
110
|
+
}
|
|
111
|
+
if (values_ != nullptr) {
|
|
112
|
+
AllocV av(allocator_);
|
|
113
|
+
av.deallocate(values_, size);
|
|
114
|
+
}
|
|
115
|
+
if (states_ != nullptr) {
|
|
116
|
+
AllocU16 au16(allocator_);
|
|
117
|
+
au16.deallocate(states_, size);
|
|
118
|
+
}
|
|
103
119
|
}
|
|
104
120
|
|
|
105
121
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
106
122
|
reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A> other) {
|
|
107
|
-
std::swap(
|
|
108
|
-
std::swap(
|
|
109
|
-
std::swap(
|
|
110
|
-
std::swap(
|
|
111
|
-
std::swap(
|
|
112
|
-
std::swap(
|
|
123
|
+
std::swap(allocator_, other.allocator_);
|
|
124
|
+
std::swap(lg_cur_size_, other.lg_cur_size_);
|
|
125
|
+
std::swap(lg_max_size_, other.lg_max_size_);
|
|
126
|
+
std::swap(num_active_, other.num_active_);
|
|
127
|
+
std::swap(keys_, other.keys_);
|
|
128
|
+
std::swap(values_, other.values_);
|
|
129
|
+
std::swap(states_, other.states_);
|
|
113
130
|
return *this;
|
|
114
131
|
}
|
|
115
132
|
|
|
116
133
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
117
134
|
reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A>&& other) {
|
|
118
|
-
std::swap(
|
|
119
|
-
std::swap(
|
|
120
|
-
std::swap(
|
|
121
|
-
std::swap(
|
|
122
|
-
std::swap(
|
|
123
|
-
std::swap(
|
|
135
|
+
std::swap(allocator_, other.allocator_);
|
|
136
|
+
std::swap(lg_cur_size_, other.lg_cur_size_);
|
|
137
|
+
std::swap(lg_max_size_, other.lg_max_size_);
|
|
138
|
+
std::swap(num_active_, other.num_active_);
|
|
139
|
+
std::swap(keys_, other.keys_);
|
|
140
|
+
std::swap(values_, other.values_);
|
|
141
|
+
std::swap(states_, other.states_);
|
|
124
142
|
return *this;
|
|
125
143
|
}
|
|
126
144
|
|
|
127
145
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
128
|
-
|
|
129
|
-
|
|
146
|
+
template<typename FwdK>
|
|
147
|
+
V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(FwdK&& key, V value) {
|
|
148
|
+
const uint32_t num_active_before = num_active_;
|
|
130
149
|
const uint32_t index = internal_adjust_or_insert(key, value);
|
|
131
|
-
if (
|
|
132
|
-
new (&
|
|
133
|
-
return resize_or_purge_if_needed();
|
|
134
|
-
}
|
|
135
|
-
return 0;
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
template<typename K, typename V, typename H, typename E, typename A>
|
|
139
|
-
V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(K&& key, V value) {
|
|
140
|
-
const uint32_t num_active_before = num_active;
|
|
141
|
-
const uint32_t index = internal_adjust_or_insert(key, value);
|
|
142
|
-
if (num_active > num_active_before) {
|
|
143
|
-
new (&keys[index]) K(std::move(key));
|
|
150
|
+
if (num_active_ > num_active_before) {
|
|
151
|
+
new (&keys_[index]) K(std::forward<FwdK>(key));
|
|
144
152
|
return resize_or_purge_if_needed();
|
|
145
153
|
}
|
|
146
154
|
return 0;
|
|
@@ -148,10 +156,10 @@ V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(K&& key, V value) {
|
|
|
148
156
|
|
|
149
157
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
150
158
|
V reverse_purge_hash_map<K, V, H, E, A>::get(const K& key) const {
|
|
151
|
-
const uint32_t mask = (1 <<
|
|
159
|
+
const uint32_t mask = (1 << lg_cur_size_) - 1;
|
|
152
160
|
uint32_t probe = fmix64(H()(key)) & mask;
|
|
153
161
|
while (is_active(probe)) {
|
|
154
|
-
if (E()(
|
|
162
|
+
if (E()(keys_[probe], key)) return values_[probe];
|
|
155
163
|
probe = (probe + 1) & mask;
|
|
156
164
|
}
|
|
157
165
|
return 0;
|
|
@@ -159,27 +167,32 @@ V reverse_purge_hash_map<K, V, H, E, A>::get(const K& key) const {
|
|
|
159
167
|
|
|
160
168
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
161
169
|
uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_cur_size() const {
|
|
162
|
-
return
|
|
170
|
+
return lg_cur_size_;
|
|
163
171
|
}
|
|
164
172
|
|
|
165
173
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
166
174
|
uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_max_size() const {
|
|
167
|
-
return
|
|
175
|
+
return lg_max_size_;
|
|
168
176
|
}
|
|
169
177
|
|
|
170
178
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
171
179
|
uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_capacity() const {
|
|
172
|
-
return (1 <<
|
|
180
|
+
return (1 << lg_cur_size_) * LOAD_FACTOR;
|
|
173
181
|
}
|
|
174
182
|
|
|
175
183
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
176
184
|
uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_num_active() const {
|
|
177
|
-
return
|
|
185
|
+
return num_active_;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
189
|
+
const A& reverse_purge_hash_map<K, V, H, E, A>::get_allocator() const {
|
|
190
|
+
return allocator_;
|
|
178
191
|
}
|
|
179
192
|
|
|
180
193
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
181
194
|
typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::begin() const {
|
|
182
|
-
const uint32_t size = 1 <<
|
|
195
|
+
const uint32_t size = 1 << lg_cur_size_;
|
|
183
196
|
uint32_t i = 0;
|
|
184
197
|
while (i < size && !is_active(i)) i++;
|
|
185
198
|
return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, i, 0);
|
|
@@ -187,40 +200,40 @@ typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<
|
|
|
187
200
|
|
|
188
201
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
189
202
|
typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::end() const {
|
|
190
|
-
return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, 1 <<
|
|
203
|
+
return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, 1 << lg_cur_size_, num_active_);
|
|
191
204
|
}
|
|
192
205
|
|
|
193
206
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
194
207
|
bool reverse_purge_hash_map<K, V, H, E, A>::is_active(uint32_t index) const {
|
|
195
|
-
return
|
|
208
|
+
return states_[index] > 0;
|
|
196
209
|
}
|
|
197
210
|
|
|
198
211
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
199
212
|
void reverse_purge_hash_map<K, V, H, E, A>::subtract_and_keep_positive_only(V amount) {
|
|
200
213
|
// starting from the back, find the first empty cell,
|
|
201
214
|
// which establishes the high end of a cluster.
|
|
202
|
-
uint32_t first_probe = (1 <<
|
|
215
|
+
uint32_t first_probe = (1 << lg_cur_size_) - 1;
|
|
203
216
|
while (is_active(first_probe)) first_probe--;
|
|
204
217
|
// when we find the next non-empty cell, we know we are at the high end of a cluster
|
|
205
218
|
// work towards the front, delete any non-positive entries.
|
|
206
219
|
for (uint32_t probe = first_probe; probe-- > 0;) {
|
|
207
220
|
if (is_active(probe)) {
|
|
208
|
-
if (
|
|
221
|
+
if (values_[probe] <= amount) {
|
|
209
222
|
hash_delete(probe); // does the work of deletion and moving higher items towards the front
|
|
210
|
-
|
|
223
|
+
num_active_--;
|
|
211
224
|
} else {
|
|
212
|
-
|
|
225
|
+
values_[probe] -= amount;
|
|
213
226
|
}
|
|
214
227
|
}
|
|
215
228
|
}
|
|
216
229
|
// now work on the first cluster that was skipped
|
|
217
|
-
for (uint32_t probe = (1 <<
|
|
230
|
+
for (uint32_t probe = (1 << lg_cur_size_); probe-- > first_probe;) {
|
|
218
231
|
if (is_active(probe)) {
|
|
219
|
-
if (
|
|
232
|
+
if (values_[probe] <= amount) {
|
|
220
233
|
hash_delete(probe);
|
|
221
|
-
|
|
234
|
+
num_active_--;
|
|
222
235
|
} else {
|
|
223
|
-
|
|
236
|
+
values_[probe] -= amount;
|
|
224
237
|
}
|
|
225
238
|
}
|
|
226
239
|
}
|
|
@@ -231,20 +244,20 @@ void reverse_purge_hash_map<K, V, H, E, A>::hash_delete(uint32_t delete_index) {
|
|
|
231
244
|
// Looks ahead in the table to search for another
|
|
232
245
|
// item to move to this location
|
|
233
246
|
// if none are found, the status is changed
|
|
234
|
-
|
|
235
|
-
|
|
247
|
+
states_[delete_index] = 0; // mark as empty
|
|
248
|
+
keys_[delete_index].~K();
|
|
236
249
|
uint32_t drift = 1;
|
|
237
|
-
const uint32_t mask = (1 <<
|
|
250
|
+
const uint32_t mask = (1 << lg_cur_size_) - 1;
|
|
238
251
|
uint32_t probe = (delete_index + drift) & mask; // map length must be a power of 2
|
|
239
252
|
// advance until we find a free location replacing locations as needed
|
|
240
253
|
while (is_active(probe)) {
|
|
241
|
-
if (
|
|
254
|
+
if (states_[probe] > drift) {
|
|
242
255
|
// move current element
|
|
243
|
-
new (&
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
256
|
+
new (&keys_[delete_index]) K(std::move(keys_[probe]));
|
|
257
|
+
values_[delete_index] = values_[probe];
|
|
258
|
+
states_[delete_index] = states_[probe] - drift;
|
|
259
|
+
states_[probe] = 0; // mark as empty
|
|
260
|
+
keys_[probe].~K();
|
|
248
261
|
drift = 0;
|
|
249
262
|
delete_index = probe;
|
|
250
263
|
}
|
|
@@ -257,13 +270,13 @@ void reverse_purge_hash_map<K, V, H, E, A>::hash_delete(uint32_t delete_index) {
|
|
|
257
270
|
|
|
258
271
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
259
272
|
uint32_t reverse_purge_hash_map<K, V, H, E, A>::internal_adjust_or_insert(const K& key, V value) {
|
|
260
|
-
const uint32_t mask = (1 <<
|
|
273
|
+
const uint32_t mask = (1 << lg_cur_size_) - 1;
|
|
261
274
|
uint32_t index = fmix64(H()(key)) & mask;
|
|
262
275
|
uint16_t drift = 1;
|
|
263
276
|
while (is_active(index)) {
|
|
264
|
-
if (E()(
|
|
277
|
+
if (E()(keys_[index], key)) {
|
|
265
278
|
// adjusting the value of an existing key
|
|
266
|
-
|
|
279
|
+
values_[index] += value;
|
|
267
280
|
return index;
|
|
268
281
|
}
|
|
269
282
|
index = (index + 1) & mask;
|
|
@@ -272,23 +285,23 @@ uint32_t reverse_purge_hash_map<K, V, H, E, A>::internal_adjust_or_insert(const
|
|
|
272
285
|
if (drift >= DRIFT_LIMIT) throw std::logic_error("drift limit reached");
|
|
273
286
|
}
|
|
274
287
|
// adding the key and value to the table
|
|
275
|
-
if (
|
|
276
|
-
throw std::logic_error("num_active " + std::to_string(
|
|
288
|
+
if (num_active_ > get_capacity()) {
|
|
289
|
+
throw std::logic_error("num_active " + std::to_string(num_active_) + " > capacity " + std::to_string(get_capacity()));
|
|
277
290
|
}
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
291
|
+
values_[index] = value;
|
|
292
|
+
states_[index] = drift;
|
|
293
|
+
num_active_++;
|
|
281
294
|
return index;
|
|
282
295
|
}
|
|
283
296
|
|
|
284
297
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
285
298
|
V reverse_purge_hash_map<K, V, H, E, A>::resize_or_purge_if_needed() {
|
|
286
|
-
if (
|
|
287
|
-
if (
|
|
288
|
-
resize(
|
|
299
|
+
if (num_active_ > get_capacity()) {
|
|
300
|
+
if (lg_cur_size_ < lg_max_size_) { // can grow
|
|
301
|
+
resize(lg_cur_size_ + 1);
|
|
289
302
|
} else { // at target size, must purge
|
|
290
303
|
const V offset = purge();
|
|
291
|
-
if (
|
|
304
|
+
if (num_active_ > get_capacity()) {
|
|
292
305
|
throw std::logic_error("purge did not reduce number of active items");
|
|
293
306
|
}
|
|
294
307
|
return offset;
|
|
@@ -299,43 +312,46 @@ V reverse_purge_hash_map<K, V, H, E, A>::resize_or_purge_if_needed() {
|
|
|
299
312
|
|
|
300
313
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
301
314
|
void reverse_purge_hash_map<K, V, H, E, A>::resize(uint8_t lg_new_size) {
|
|
302
|
-
const uint32_t old_size = 1 <<
|
|
303
|
-
K* old_keys =
|
|
304
|
-
V* old_values =
|
|
305
|
-
uint16_t* old_states =
|
|
315
|
+
const uint32_t old_size = 1 << lg_cur_size_;
|
|
316
|
+
K* old_keys = keys_;
|
|
317
|
+
V* old_values = values_;
|
|
318
|
+
uint16_t* old_states = states_;
|
|
306
319
|
const uint32_t new_size = 1 << lg_new_size;
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
320
|
+
keys_ = allocator_.allocate(new_size);
|
|
321
|
+
AllocV av(allocator_);
|
|
322
|
+
values_ = av.allocate(new_size);
|
|
323
|
+
AllocU16 au16(allocator_);
|
|
324
|
+
states_ = au16.allocate(new_size);
|
|
325
|
+
std::fill(states_, states_ + new_size, 0);
|
|
326
|
+
num_active_ = 0;
|
|
327
|
+
lg_cur_size_ = lg_new_size;
|
|
313
328
|
for (uint32_t i = 0; i < old_size; i++) {
|
|
314
329
|
if (old_states[i] > 0) {
|
|
315
330
|
adjust_or_insert(std::move(old_keys[i]), old_values[i]);
|
|
316
331
|
old_keys[i].~K();
|
|
317
332
|
}
|
|
318
333
|
}
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
334
|
+
allocator_.deallocate(old_keys, old_size);
|
|
335
|
+
av.deallocate(old_values, old_size);
|
|
336
|
+
au16.deallocate(old_states, old_size);
|
|
322
337
|
}
|
|
323
338
|
|
|
324
339
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
325
340
|
V reverse_purge_hash_map<K, V, H, E, A>::purge() {
|
|
326
|
-
const uint32_t limit = std::min(MAX_SAMPLE_SIZE,
|
|
341
|
+
const uint32_t limit = std::min(MAX_SAMPLE_SIZE, num_active_);
|
|
327
342
|
uint32_t num_samples = 0;
|
|
328
343
|
uint32_t i = 0;
|
|
329
|
-
|
|
344
|
+
AllocV av(allocator_);
|
|
345
|
+
V* samples = av.allocate(limit);
|
|
330
346
|
while (num_samples < limit) {
|
|
331
347
|
if (is_active(i)) {
|
|
332
|
-
samples[num_samples++] =
|
|
348
|
+
samples[num_samples++] = values_[i];
|
|
333
349
|
}
|
|
334
350
|
i++;
|
|
335
351
|
}
|
|
336
|
-
std::nth_element(
|
|
352
|
+
std::nth_element(samples, samples+ (num_samples / 2), samples + num_samples);
|
|
337
353
|
const V median = samples[num_samples / 2];
|
|
338
|
-
|
|
354
|
+
av.deallocate(samples, limit);
|
|
339
355
|
subtract_and_keep_positive_only(median);
|
|
340
356
|
return median;
|
|
341
357
|
}
|