datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef REVERSE_PURGE_HASH_MAP_HPP_
|
|
21
|
+
#define REVERSE_PURGE_HASH_MAP_HPP_
|
|
22
|
+
|
|
23
|
+
#include <memory>
|
|
24
|
+
#include <iterator>
|
|
25
|
+
|
|
26
|
+
namespace datasketches {
|
|
27
|
+
|
|
28
|
+
/*
|
|
29
|
+
* This is a specialized linear-probing hash map with a reverse purge operation
|
|
30
|
+
* that removes all entries in the map with values that are less than zero.
|
|
31
|
+
* Based on Java implementation here:
|
|
32
|
+
* https://github.com/DataSketches/sketches-core/blob/master/src/main/java/com/yahoo/sketches/frequencies/ReversePurgeItemHashMap.java
|
|
33
|
+
* author Alexander Saydakov
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
template<typename K, typename V = uint64_t, typename H = std::hash<K>, typename E = std::equal_to<K>, typename A = std::allocator<K>>
|
|
37
|
+
class reverse_purge_hash_map {
|
|
38
|
+
public:
|
|
39
|
+
using AllocV = typename std::allocator_traits<A>::template rebind_alloc<V>;
|
|
40
|
+
using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
|
|
41
|
+
|
|
42
|
+
reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size);
|
|
43
|
+
reverse_purge_hash_map(const reverse_purge_hash_map& other);
|
|
44
|
+
reverse_purge_hash_map(reverse_purge_hash_map&& other) noexcept;
|
|
45
|
+
~reverse_purge_hash_map();
|
|
46
|
+
reverse_purge_hash_map& operator=(reverse_purge_hash_map other);
|
|
47
|
+
reverse_purge_hash_map& operator=(reverse_purge_hash_map&& other);
|
|
48
|
+
V adjust_or_insert(const K& key, V value);
|
|
49
|
+
V adjust_or_insert(K&& key, V value);
|
|
50
|
+
V get(const K& key) const;
|
|
51
|
+
uint8_t get_lg_cur_size() const;
|
|
52
|
+
uint8_t get_lg_max_size() const;
|
|
53
|
+
uint32_t get_capacity() const;
|
|
54
|
+
uint32_t get_num_active() const;
|
|
55
|
+
class iterator;
|
|
56
|
+
iterator begin() const;
|
|
57
|
+
iterator end() const;
|
|
58
|
+
private:
|
|
59
|
+
static constexpr double LOAD_FACTOR = 0.75;
|
|
60
|
+
static constexpr uint16_t DRIFT_LIMIT = 1024; // used only for stress testing
|
|
61
|
+
static constexpr uint32_t MAX_SAMPLE_SIZE = 1024; // number of samples to compute approximate median during purge
|
|
62
|
+
|
|
63
|
+
uint8_t lg_cur_size;
|
|
64
|
+
uint8_t lg_max_size;
|
|
65
|
+
uint32_t num_active;
|
|
66
|
+
K* keys;
|
|
67
|
+
V* values;
|
|
68
|
+
uint16_t* states;
|
|
69
|
+
|
|
70
|
+
inline bool is_active(uint32_t probe) const;
|
|
71
|
+
void subtract_and_keep_positive_only(V amount);
|
|
72
|
+
void hash_delete(uint32_t probe);
|
|
73
|
+
uint32_t internal_adjust_or_insert(const K& key, V value);
|
|
74
|
+
V resize_or_purge_if_needed();
|
|
75
|
+
void resize(uint8_t lg_new_size);
|
|
76
|
+
V purge();
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
// This iterator uses strides based on golden ratio to avoid clustering during merge
|
|
80
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
81
|
+
class reverse_purge_hash_map<K, V, H, E, A>::iterator: public std::iterator<std::input_iterator_tag, K> {
|
|
82
|
+
public:
|
|
83
|
+
friend class reverse_purge_hash_map<K, V, H, E, A>;
|
|
84
|
+
iterator& operator++() {
|
|
85
|
+
++count;
|
|
86
|
+
if (count < map->num_active) {
|
|
87
|
+
const uint32_t mask = (1 << map->lg_cur_size) - 1;
|
|
88
|
+
do {
|
|
89
|
+
index = (index + stride) & mask;
|
|
90
|
+
} while (!map->is_active(index));
|
|
91
|
+
}
|
|
92
|
+
return *this;
|
|
93
|
+
}
|
|
94
|
+
iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; }
|
|
95
|
+
bool operator==(const iterator& rhs) const { return count == rhs.count; }
|
|
96
|
+
bool operator!=(const iterator& rhs) const { return count != rhs.count; }
|
|
97
|
+
const std::pair<K&, V> operator*() const {
|
|
98
|
+
return std::pair<K&, V>(map->keys[index], map->values[index]);
|
|
99
|
+
}
|
|
100
|
+
private:
|
|
101
|
+
static constexpr double GOLDEN_RATIO_RECIPROCAL = 0.6180339887498949; // = (sqrt(5) - 1) / 2
|
|
102
|
+
const reverse_purge_hash_map<K, V, H, E, A>* map;
|
|
103
|
+
uint32_t index;
|
|
104
|
+
uint32_t count;
|
|
105
|
+
uint32_t stride;
|
|
106
|
+
iterator(const reverse_purge_hash_map<K, V, H, E, A>* map, uint32_t index, uint32_t count):
|
|
107
|
+
map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size) * GOLDEN_RATIO_RECIPROCAL) | 1) {}
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
} /* namespace datasketches */
|
|
111
|
+
|
|
112
|
+
#include "reverse_purge_hash_map_impl.hpp"
|
|
113
|
+
|
|
114
|
+
#endif
|
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef REVERSE_PURGE_HASH_MAP_IMPL_HPP_
|
|
21
|
+
#define REVERSE_PURGE_HASH_MAP_IMPL_HPP_
|
|
22
|
+
|
|
23
|
+
#include <memory>
|
|
24
|
+
#include <algorithm>
|
|
25
|
+
#include <iterator>
|
|
26
|
+
#include <cmath>
|
|
27
|
+
|
|
28
|
+
#include "MurmurHash3.h"
|
|
29
|
+
|
|
30
|
+
namespace datasketches {
|
|
31
|
+
|
|
32
|
+
// clang++ seems to require this declaration for CMAKE_BUILD_TYPE='Debug"
|
|
33
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
34
|
+
constexpr uint32_t reverse_purge_hash_map<K, V, H, E, A>::MAX_SAMPLE_SIZE;
|
|
35
|
+
|
|
36
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
37
|
+
reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size):
|
|
38
|
+
lg_cur_size(lg_cur_size),
|
|
39
|
+
lg_max_size(lg_max_size),
|
|
40
|
+
num_active(0),
|
|
41
|
+
keys(A().allocate(1 << lg_cur_size)),
|
|
42
|
+
values(AllocV().allocate(1 << lg_cur_size)),
|
|
43
|
+
states(AllocU16().allocate(1 << lg_cur_size))
|
|
44
|
+
{
|
|
45
|
+
std::fill(states, &states[1 << lg_cur_size], 0);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
49
|
+
reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(const reverse_purge_hash_map<K, V, H, E, A>& other):
|
|
50
|
+
lg_cur_size(other.lg_cur_size),
|
|
51
|
+
lg_max_size(other.lg_max_size),
|
|
52
|
+
num_active(other.num_active),
|
|
53
|
+
keys(A().allocate(1 << lg_cur_size)),
|
|
54
|
+
values(AllocV().allocate(1 << lg_cur_size)),
|
|
55
|
+
states(AllocU16().allocate(1 << lg_cur_size))
|
|
56
|
+
{
|
|
57
|
+
const uint32_t size = 1 << lg_cur_size;
|
|
58
|
+
if (num_active > 0) {
|
|
59
|
+
auto num = num_active;
|
|
60
|
+
for (uint32_t i = 0; i < size; i++) {
|
|
61
|
+
if (other.states[i] > 0) {
|
|
62
|
+
new (&keys[i]) K(other.keys[i]);
|
|
63
|
+
values[i] = other.values[i];
|
|
64
|
+
}
|
|
65
|
+
if (--num == 0) break;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
std::copy(&other.states[0], &other.states[size], states);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
72
|
+
reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(reverse_purge_hash_map<K, V, H, E, A>&& other) noexcept:
|
|
73
|
+
lg_cur_size(other.lg_cur_size),
|
|
74
|
+
lg_max_size(other.lg_max_size),
|
|
75
|
+
num_active(other.num_active),
|
|
76
|
+
keys(nullptr),
|
|
77
|
+
values(nullptr),
|
|
78
|
+
states(nullptr)
|
|
79
|
+
{
|
|
80
|
+
std::swap(keys, other.keys);
|
|
81
|
+
std::swap(values, other.values);
|
|
82
|
+
std::swap(states, other.states);
|
|
83
|
+
other.num_active = 0;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
87
|
+
reverse_purge_hash_map<K, V, H, E, A>::~reverse_purge_hash_map() {
|
|
88
|
+
const uint32_t size = 1 << lg_cur_size;
|
|
89
|
+
if (num_active > 0) {
|
|
90
|
+
for (uint32_t i = 0; i < size; i++) {
|
|
91
|
+
if (is_active(i)) {
|
|
92
|
+
keys[i].~K();
|
|
93
|
+
if (--num_active == 0) break;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
if (keys != nullptr)
|
|
98
|
+
A().deallocate(keys, size);
|
|
99
|
+
if (values != nullptr)
|
|
100
|
+
AllocV().deallocate(values, size);
|
|
101
|
+
if (states != nullptr)
|
|
102
|
+
AllocU16().deallocate(states, size);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
106
|
+
reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A> other) {
|
|
107
|
+
std::swap(lg_cur_size, other.lg_cur_size);
|
|
108
|
+
std::swap(lg_max_size, other.lg_max_size);
|
|
109
|
+
std::swap(num_active, other.num_active);
|
|
110
|
+
std::swap(keys, other.keys);
|
|
111
|
+
std::swap(values, other.values);
|
|
112
|
+
std::swap(states, other.states);
|
|
113
|
+
return *this;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
117
|
+
reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A>&& other) {
|
|
118
|
+
std::swap(lg_cur_size, other.lg_cur_size);
|
|
119
|
+
std::swap(lg_max_size, other.lg_max_size);
|
|
120
|
+
std::swap(num_active, other.num_active);
|
|
121
|
+
std::swap(keys, other.keys);
|
|
122
|
+
std::swap(values, other.values);
|
|
123
|
+
std::swap(states, other.states);
|
|
124
|
+
return *this;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
128
|
+
V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(const K& key, V value) {
|
|
129
|
+
const uint32_t num_active_before = num_active;
|
|
130
|
+
const uint32_t index = internal_adjust_or_insert(key, value);
|
|
131
|
+
if (num_active > num_active_before) {
|
|
132
|
+
new (&keys[index]) K(key);
|
|
133
|
+
return resize_or_purge_if_needed();
|
|
134
|
+
}
|
|
135
|
+
return 0;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
139
|
+
V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(K&& key, V value) {
|
|
140
|
+
const uint32_t num_active_before = num_active;
|
|
141
|
+
const uint32_t index = internal_adjust_or_insert(key, value);
|
|
142
|
+
if (num_active > num_active_before) {
|
|
143
|
+
new (&keys[index]) K(std::move(key));
|
|
144
|
+
return resize_or_purge_if_needed();
|
|
145
|
+
}
|
|
146
|
+
return 0;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
150
|
+
V reverse_purge_hash_map<K, V, H, E, A>::get(const K& key) const {
|
|
151
|
+
const uint32_t mask = (1 << lg_cur_size) - 1;
|
|
152
|
+
uint32_t probe = fmix64(H()(key)) & mask;
|
|
153
|
+
while (is_active(probe)) {
|
|
154
|
+
if (E()(keys[probe], key)) return values[probe];
|
|
155
|
+
probe = (probe + 1) & mask;
|
|
156
|
+
}
|
|
157
|
+
return 0;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
161
|
+
uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_cur_size() const {
|
|
162
|
+
return lg_cur_size;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
166
|
+
uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_max_size() const {
|
|
167
|
+
return lg_max_size;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
171
|
+
uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_capacity() const {
|
|
172
|
+
return (1 << lg_cur_size) * LOAD_FACTOR;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
176
|
+
uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_num_active() const {
|
|
177
|
+
return num_active;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
181
|
+
typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::begin() const {
|
|
182
|
+
const uint32_t size = 1 << lg_cur_size;
|
|
183
|
+
uint32_t i = 0;
|
|
184
|
+
while (i < size && !is_active(i)) i++;
|
|
185
|
+
return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, i, 0);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
189
|
+
typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::end() const {
|
|
190
|
+
return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, 1 << lg_cur_size, num_active);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
194
|
+
bool reverse_purge_hash_map<K, V, H, E, A>::is_active(uint32_t index) const {
|
|
195
|
+
return states[index] > 0;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
199
|
+
void reverse_purge_hash_map<K, V, H, E, A>::subtract_and_keep_positive_only(V amount) {
|
|
200
|
+
// starting from the back, find the first empty cell,
|
|
201
|
+
// which establishes the high end of a cluster.
|
|
202
|
+
uint32_t first_probe = (1 << lg_cur_size) - 1;
|
|
203
|
+
while (is_active(first_probe)) first_probe--;
|
|
204
|
+
// when we find the next non-empty cell, we know we are at the high end of a cluster
|
|
205
|
+
// work towards the front, delete any non-positive entries.
|
|
206
|
+
for (uint32_t probe = first_probe; probe-- > 0;) {
|
|
207
|
+
if (is_active(probe)) {
|
|
208
|
+
if (values[probe] <= amount) {
|
|
209
|
+
hash_delete(probe); // does the work of deletion and moving higher items towards the front
|
|
210
|
+
num_active--;
|
|
211
|
+
} else {
|
|
212
|
+
values[probe] -= amount;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
// now work on the first cluster that was skipped
|
|
217
|
+
for (uint32_t probe = (1 << lg_cur_size); probe-- > first_probe;) {
|
|
218
|
+
if (is_active(probe)) {
|
|
219
|
+
if (values[probe] <= amount) {
|
|
220
|
+
hash_delete(probe);
|
|
221
|
+
num_active--;
|
|
222
|
+
} else {
|
|
223
|
+
values[probe] -= amount;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
230
|
+
void reverse_purge_hash_map<K, V, H, E, A>::hash_delete(uint32_t delete_index) {
|
|
231
|
+
// Looks ahead in the table to search for another
|
|
232
|
+
// item to move to this location
|
|
233
|
+
// if none are found, the status is changed
|
|
234
|
+
states[delete_index] = 0; // mark as empty
|
|
235
|
+
keys[delete_index].~K();
|
|
236
|
+
uint32_t drift = 1;
|
|
237
|
+
const uint32_t mask = (1 << lg_cur_size) - 1;
|
|
238
|
+
uint32_t probe = (delete_index + drift) & mask; // map length must be a power of 2
|
|
239
|
+
// advance until we find a free location replacing locations as needed
|
|
240
|
+
while (is_active(probe)) {
|
|
241
|
+
if (states[probe] > drift) {
|
|
242
|
+
// move current element
|
|
243
|
+
new (&keys[delete_index]) K(std::move(keys[probe]));
|
|
244
|
+
values[delete_index] = values[probe];
|
|
245
|
+
states[delete_index] = states[probe] - drift;
|
|
246
|
+
states[probe] = 0; // mark as empty
|
|
247
|
+
keys[probe].~K();
|
|
248
|
+
drift = 0;
|
|
249
|
+
delete_index = probe;
|
|
250
|
+
}
|
|
251
|
+
probe = (probe + 1) & mask;
|
|
252
|
+
drift++;
|
|
253
|
+
// only used for theoretical analysis
|
|
254
|
+
if (drift >= DRIFT_LIMIT) throw std::logic_error("drift: " + std::to_string(drift) + " >= DRIFT_LIMIT");
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
259
|
+
uint32_t reverse_purge_hash_map<K, V, H, E, A>::internal_adjust_or_insert(const K& key, V value) {
|
|
260
|
+
const uint32_t mask = (1 << lg_cur_size) - 1;
|
|
261
|
+
uint32_t index = fmix64(H()(key)) & mask;
|
|
262
|
+
uint16_t drift = 1;
|
|
263
|
+
while (is_active(index)) {
|
|
264
|
+
if (E()(keys[index], key)) {
|
|
265
|
+
// adjusting the value of an existing key
|
|
266
|
+
values[index] += value;
|
|
267
|
+
return index;
|
|
268
|
+
}
|
|
269
|
+
index = (index + 1) & mask;
|
|
270
|
+
drift++;
|
|
271
|
+
// only used for theoretical analysis
|
|
272
|
+
if (drift >= DRIFT_LIMIT) throw std::logic_error("drift limit reached");
|
|
273
|
+
}
|
|
274
|
+
// adding the key and value to the table
|
|
275
|
+
if (num_active > get_capacity()) {
|
|
276
|
+
throw std::logic_error("num_active " + std::to_string(num_active) + " > capacity " + std::to_string(get_capacity()));
|
|
277
|
+
}
|
|
278
|
+
values[index] = value;
|
|
279
|
+
states[index] = drift;
|
|
280
|
+
num_active++;
|
|
281
|
+
return index;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
285
|
+
V reverse_purge_hash_map<K, V, H, E, A>::resize_or_purge_if_needed() {
|
|
286
|
+
if (num_active > get_capacity()) {
|
|
287
|
+
if (lg_cur_size < lg_max_size) { // can grow
|
|
288
|
+
resize(lg_cur_size + 1);
|
|
289
|
+
} else { // at target size, must purge
|
|
290
|
+
const V offset = purge();
|
|
291
|
+
if (num_active > get_capacity()) {
|
|
292
|
+
throw std::logic_error("purge did not reduce number of active items");
|
|
293
|
+
}
|
|
294
|
+
return offset;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
return 0;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
301
|
+
void reverse_purge_hash_map<K, V, H, E, A>::resize(uint8_t lg_new_size) {
|
|
302
|
+
const uint32_t old_size = 1 << lg_cur_size;
|
|
303
|
+
K* old_keys = keys;
|
|
304
|
+
V* old_values = values;
|
|
305
|
+
uint16_t* old_states = states;
|
|
306
|
+
const uint32_t new_size = 1 << lg_new_size;
|
|
307
|
+
keys = A().allocate(new_size);
|
|
308
|
+
values = AllocV().allocate(new_size);
|
|
309
|
+
states = AllocU16().allocate(new_size);
|
|
310
|
+
std::fill(states, &states[new_size], 0);
|
|
311
|
+
num_active = 0;
|
|
312
|
+
lg_cur_size = lg_new_size;
|
|
313
|
+
for (uint32_t i = 0; i < old_size; i++) {
|
|
314
|
+
if (old_states[i] > 0) {
|
|
315
|
+
adjust_or_insert(std::move(old_keys[i]), old_values[i]);
|
|
316
|
+
old_keys[i].~K();
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
A().deallocate(old_keys, old_size);
|
|
320
|
+
AllocV().deallocate(old_values, old_size);
|
|
321
|
+
AllocU16().deallocate(old_states, old_size);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
template<typename K, typename V, typename H, typename E, typename A>
|
|
325
|
+
V reverse_purge_hash_map<K, V, H, E, A>::purge() {
|
|
326
|
+
const uint32_t limit = std::min(MAX_SAMPLE_SIZE, num_active);
|
|
327
|
+
uint32_t num_samples = 0;
|
|
328
|
+
uint32_t i = 0;
|
|
329
|
+
V* samples = AllocV().allocate(limit);
|
|
330
|
+
while (num_samples < limit) {
|
|
331
|
+
if (is_active(i)) {
|
|
332
|
+
samples[num_samples++] = values[i];
|
|
333
|
+
}
|
|
334
|
+
i++;
|
|
335
|
+
}
|
|
336
|
+
std::nth_element(&samples[0], &samples[num_samples / 2], &samples[num_samples]);
|
|
337
|
+
const V median = samples[num_samples / 2];
|
|
338
|
+
AllocV().deallocate(samples, limit);
|
|
339
|
+
subtract_and_keep_positive_only(median);
|
|
340
|
+
return median;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
} /* namespace datasketches */
|
|
344
|
+
|
|
345
|
+
# endif
|