datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch.hpp>
|
|
21
|
+
|
|
22
|
+
#include <reverse_purge_hash_map.hpp>
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
TEST_CASE("reverse purge hash map: empty", "[frequent_items_sketch]") {
|
|
27
|
+
reverse_purge_hash_map<int> map(3, 3);
|
|
28
|
+
REQUIRE(map.get_num_active() == 0);
|
|
29
|
+
REQUIRE(map.get_lg_cur_size() == 3); // static_cast<uint8_t>(3)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
TEST_CASE("reverse purge hash map: one item", "[frequent_items_sketch]") {
|
|
33
|
+
reverse_purge_hash_map<int> map(3, 3);
|
|
34
|
+
map.adjust_or_insert(1, 1);
|
|
35
|
+
REQUIRE(map.get_num_active() == 1);
|
|
36
|
+
REQUIRE(map.get(1) == 1);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
TEST_CASE("reverse purge hash map: iterator", "[frequent_items_sketch]") {
|
|
40
|
+
reverse_purge_hash_map<int> map(3, 4);
|
|
41
|
+
for (int i = 0; i < 11; i++) map.adjust_or_insert(i, 1); // this should fit with no purge
|
|
42
|
+
int sum = 0;
|
|
43
|
+
for (auto &it: map) sum += it.second;
|
|
44
|
+
REQUIRE(sum == 11);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
add_library(hll INTERFACE)
|
|
19
|
+
|
|
20
|
+
add_library(${PROJECT_NAME}::HLL ALIAS hll)
|
|
21
|
+
|
|
22
|
+
if (BUILD_TESTS)
|
|
23
|
+
add_subdirectory(test)
|
|
24
|
+
endif()
|
|
25
|
+
|
|
26
|
+
target_include_directories(hll
|
|
27
|
+
INTERFACE
|
|
28
|
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
|
29
|
+
$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
target_link_libraries(hll INTERFACE common)
|
|
33
|
+
target_compile_features(hll INTERFACE cxx_std_11)
|
|
34
|
+
|
|
35
|
+
# TODO: would be useful if this didn't need to be reproduced in target_sources(), too
|
|
36
|
+
set(hll_HEADERS "")
|
|
37
|
+
list(APPEND hll_HEADERS "include/hll.hpp;include/AuxHashMap.hpp;include/CompositeInterpolationXTable.hpp")
|
|
38
|
+
list(APPEND hll_HEADERS "include/CouponHashSet.hpp;include/CouponList.hpp")
|
|
39
|
+
list(APPEND hll_HEADERS "include/CubicInterpolation.hpp;include/HarmonicNumbers.hpp;include/Hll4Array.hpp")
|
|
40
|
+
list(APPEND hll_HEADERS "include/Hll6Array.hpp;include/Hll8Array.hpp;include/HllArray.hpp")
|
|
41
|
+
list(APPEND hll_HEADERS "include/HllSketchImpl.hpp")
|
|
42
|
+
list(APPEND hll_HEADERS "include/HllUtil.hpp;include/coupon_iterator.hpp")
|
|
43
|
+
list(APPEND hll_HEADERS "include/RelativeErrorTables.hpp;include/AuxHashMap-internal.hpp")
|
|
44
|
+
list(APPEND hll_HEADERS "include/CompositeInterpolationXTable-internal.hpp")
|
|
45
|
+
list(APPEND hll_HEADERS "include/CouponHashSet-internal.hpp;include/CouponList-internal.hpp")
|
|
46
|
+
list(APPEND hll_HEADERS "include/CubicInterpolation-internal.hpp;include/HarmonicNumbers-internal.hpp")
|
|
47
|
+
list(APPEND hll_HEADERS "include/Hll4Array-internal.hpp;include/Hll6Array-internal.hpp")
|
|
48
|
+
list(APPEND hll_HEADERS "include/Hll8Array-internal.hpp;include/HllArray-internal.hpp")
|
|
49
|
+
list(APPEND hll_HEADERS "include/HllSketch-internal.hpp")
|
|
50
|
+
list(APPEND hll_HEADERS "include/HllSketchImpl-internal.hpp;include/HllUnion-internal.hpp")
|
|
51
|
+
list(APPEND hll_HEADERS "include/coupon_iterator-internal.hpp;include/RelativeErrorTables-internal.hpp")
|
|
52
|
+
|
|
53
|
+
install(TARGETS hll
|
|
54
|
+
EXPORT ${PROJECT_NAME}
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
install(FILES ${hll_HEADERS}
|
|
58
|
+
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
|
59
|
+
|
|
60
|
+
target_sources(hll
|
|
61
|
+
INTERFACE
|
|
62
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/hll.hpp
|
|
63
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap.hpp
|
|
64
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable.hpp
|
|
65
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet.hpp
|
|
66
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList.hpp
|
|
67
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation.hpp
|
|
68
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers.hpp
|
|
69
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array.hpp
|
|
70
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array.hpp
|
|
71
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array.hpp
|
|
72
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray.hpp
|
|
73
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl.hpp
|
|
74
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/HllUtil.hpp
|
|
75
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables.hpp
|
|
76
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator.hpp
|
|
77
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap-internal.hpp
|
|
78
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable-internal.hpp
|
|
79
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet-internal.hpp
|
|
80
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList-internal.hpp
|
|
81
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation-internal.hpp
|
|
82
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers-internal.hpp
|
|
83
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array-internal.hpp
|
|
84
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array-internal.hpp
|
|
85
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array-internal.hpp
|
|
86
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray-internal.hpp
|
|
87
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketch-internal.hpp
|
|
88
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl-internal.hpp
|
|
89
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/HllUnion-internal.hpp
|
|
90
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables-internal.hpp
|
|
91
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator-internal.hpp
|
|
92
|
+
)
|
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef _AUXHASHMAP_INTERNAL_HPP_
|
|
21
|
+
#define _AUXHASHMAP_INTERNAL_HPP_
|
|
22
|
+
|
|
23
|
+
#include "HllUtil.hpp"
|
|
24
|
+
#include "AuxHashMap.hpp"
|
|
25
|
+
|
|
26
|
+
namespace datasketches {
|
|
27
|
+
|
|
28
|
+
template<typename A>
|
|
29
|
+
AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK)
|
|
30
|
+
: lgConfigK(lgConfigK),
|
|
31
|
+
lgAuxArrInts(lgAuxArrInts),
|
|
32
|
+
auxCount(0) {
|
|
33
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
|
34
|
+
const int numItems = 1 << lgAuxArrInts;
|
|
35
|
+
auxIntArr = intAlloc().allocate(numItems);
|
|
36
|
+
std::fill(auxIntArr, auxIntArr + numItems, 0);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
template<typename A>
|
|
40
|
+
AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK) {
|
|
41
|
+
return new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
template<typename A>
|
|
45
|
+
AuxHashMap<A>::AuxHashMap(const AuxHashMap& that)
|
|
46
|
+
: lgConfigK(that.lgConfigK),
|
|
47
|
+
lgAuxArrInts(that.lgAuxArrInts),
|
|
48
|
+
auxCount(that.auxCount) {
|
|
49
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
|
50
|
+
const int numItems = 1 << lgAuxArrInts;
|
|
51
|
+
auxIntArr = intAlloc().allocate(numItems);
|
|
52
|
+
std::copy(that.auxIntArr, that.auxIntArr + numItems, auxIntArr);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
template<typename A>
|
|
56
|
+
AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) {
|
|
57
|
+
return new (ahmAlloc().allocate(1)) AuxHashMap<A>(that);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
template<typename A>
|
|
61
|
+
AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
|
|
62
|
+
int lgConfigK,
|
|
63
|
+
int auxCount, int lgAuxArrInts,
|
|
64
|
+
bool srcCompact) {
|
|
65
|
+
int lgArrInts = lgAuxArrInts;
|
|
66
|
+
if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
|
|
67
|
+
lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
|
|
68
|
+
} else { // updatable
|
|
69
|
+
lgArrInts = lgAuxArrInts;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
int configKmask = (1 << lgConfigK) - 1;
|
|
73
|
+
|
|
74
|
+
AuxHashMap<A>* auxHashMap;
|
|
75
|
+
const int* auxPtr = static_cast<const int*>(bytes);
|
|
76
|
+
if (srcCompact) {
|
|
77
|
+
if (len < auxCount * sizeof(int)) {
|
|
78
|
+
throw std::out_of_range("Input array too small to hold AuxHashMap image");
|
|
79
|
+
}
|
|
80
|
+
auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
|
|
81
|
+
for (int i = 0; i < auxCount; ++i) {
|
|
82
|
+
int pair = auxPtr[i];
|
|
83
|
+
int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
|
|
84
|
+
int value = HllUtil<A>::getValue(pair);
|
|
85
|
+
auxHashMap->mustAdd(slotNo, value);
|
|
86
|
+
}
|
|
87
|
+
} else { // updatable
|
|
88
|
+
int itemsToRead = 1 << lgAuxArrInts;
|
|
89
|
+
if (len < itemsToRead * sizeof(int)) {
|
|
90
|
+
throw std::out_of_range("Input array too small to hold AuxHashMap image");
|
|
91
|
+
}
|
|
92
|
+
auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
|
|
93
|
+
for (int i = 0; i < itemsToRead; ++i) {
|
|
94
|
+
int pair = auxPtr[i];
|
|
95
|
+
if (pair == HllUtil<A>::EMPTY) { continue; }
|
|
96
|
+
int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
|
|
97
|
+
int value = HllUtil<A>::getValue(pair);
|
|
98
|
+
auxHashMap->mustAdd(slotNo, value);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (auxHashMap->getAuxCount() != auxCount) {
|
|
103
|
+
make_deleter()(auxHashMap);
|
|
104
|
+
throw std::invalid_argument("Deserialized AuxHashMap has wrong number of entries");
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return auxHashMap;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
template<typename A>
|
|
111
|
+
AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
|
|
112
|
+
const int auxCount, const int lgAuxArrInts,
|
|
113
|
+
const bool srcCompact) {
|
|
114
|
+
int lgArrInts = lgAuxArrInts;
|
|
115
|
+
if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
|
|
116
|
+
lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
|
|
117
|
+
} else { // updatable
|
|
118
|
+
lgArrInts = lgAuxArrInts;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
AuxHashMap<A>* auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
|
|
122
|
+
typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
|
|
123
|
+
aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter());
|
|
124
|
+
|
|
125
|
+
int configKmask = (1 << lgConfigK) - 1;
|
|
126
|
+
|
|
127
|
+
if (srcCompact) {
|
|
128
|
+
int pair;
|
|
129
|
+
for (int i = 0; i < auxCount; ++i) {
|
|
130
|
+
is.read((char*)&pair, sizeof(pair));
|
|
131
|
+
int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
|
|
132
|
+
int value = HllUtil<A>::getValue(pair);
|
|
133
|
+
auxHashMap->mustAdd(slotNo, value);
|
|
134
|
+
}
|
|
135
|
+
} else { // updatable
|
|
136
|
+
int itemsToRead = 1 << lgAuxArrInts;
|
|
137
|
+
int pair;
|
|
138
|
+
for (int i = 0; i < itemsToRead; ++i) {
|
|
139
|
+
is.read((char*)&pair, sizeof(pair));
|
|
140
|
+
if (pair == HllUtil<A>::EMPTY) { continue; }
|
|
141
|
+
int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
|
|
142
|
+
int value = HllUtil<A>::getValue(pair);
|
|
143
|
+
auxHashMap->mustAdd(slotNo, value);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if (auxHashMap->getAuxCount() != auxCount) {
|
|
148
|
+
make_deleter()(auxHashMap);
|
|
149
|
+
throw std::invalid_argument("Deserialized AuxHashMap has wrong number of entries");
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return aux_ptr.release();
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
template<typename A>
|
|
156
|
+
AuxHashMap<A>::~AuxHashMap<A>() {
|
|
157
|
+
// should be no way to have an object without a valid array
|
|
158
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
|
159
|
+
intAlloc().deallocate(auxIntArr, 1 << lgAuxArrInts);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
template<typename A>
|
|
163
|
+
std::function<void(AuxHashMap<A>*)> AuxHashMap<A>::make_deleter() {
|
|
164
|
+
return [](AuxHashMap<A>* ptr) {
|
|
165
|
+
ptr->~AuxHashMap();
|
|
166
|
+
ahmAlloc().deallocate(ptr, 1);
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
template<typename A>
|
|
171
|
+
AuxHashMap<A>* AuxHashMap<A>::copy() const {
|
|
172
|
+
return new (ahmAlloc().allocate(1)) AuxHashMap<A>(*this);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
template<typename A>
|
|
176
|
+
int AuxHashMap<A>::getAuxCount() const {
|
|
177
|
+
return auxCount;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
template<typename A>
|
|
181
|
+
int* AuxHashMap<A>::getAuxIntArr(){
|
|
182
|
+
return auxIntArr;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
template<typename A>
|
|
186
|
+
int AuxHashMap<A>::getLgAuxArrInts() const {
|
|
187
|
+
return lgAuxArrInts;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
template<typename A>
|
|
191
|
+
int AuxHashMap<A>::getCompactSizeBytes() const {
|
|
192
|
+
return auxCount << 2;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
template<typename A>
|
|
196
|
+
int AuxHashMap<A>::getUpdatableSizeBytes() const {
|
|
197
|
+
return 4 << lgAuxArrInts;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
template<typename A>
|
|
201
|
+
void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
|
|
202
|
+
const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
|
|
203
|
+
const int entry_pair = HllUtil<A>::pair(slotNo, value);
|
|
204
|
+
if (index >= 0) {
|
|
205
|
+
throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: "
|
|
206
|
+
+ std::to_string(slotNo) + ", Value: " + std::to_string(value));
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// found empty entry
|
|
210
|
+
auxIntArr[~index] = entry_pair;
|
|
211
|
+
++auxCount;
|
|
212
|
+
checkGrow();
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
template<typename A>
|
|
216
|
+
int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
|
|
217
|
+
const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
|
|
218
|
+
if (index >= 0) {
|
|
219
|
+
return HllUtil<A>::getValue(auxIntArr[index]);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
throw std::invalid_argument("slotNo not found: " + std::to_string(slotNo));
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
template<typename A>
|
|
226
|
+
void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
|
|
227
|
+
const int idx = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
|
|
228
|
+
if (idx >= 0) {
|
|
229
|
+
auxIntArr[idx] = HllUtil<A>::pair(slotNo, value);
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
throw std::invalid_argument("Pair not found: SlotNo: " + std::to_string(slotNo)
|
|
234
|
+
+ ", Value: " + std::to_string(value));
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
template<typename A>
|
|
238
|
+
void AuxHashMap<A>::checkGrow() {
|
|
239
|
+
if ((HllUtil<A>::RESIZE_DENOM * auxCount) > (HllUtil<A>::RESIZE_NUMER * (1 << lgAuxArrInts))) {
|
|
240
|
+
growAuxSpace();
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
template<typename A>
|
|
245
|
+
void AuxHashMap<A>::growAuxSpace() {
|
|
246
|
+
int* oldArray = auxIntArr;
|
|
247
|
+
const int oldArrLen = 1 << lgAuxArrInts;
|
|
248
|
+
const int configKmask = (1 << lgConfigK) - 1;
|
|
249
|
+
const int newArrLen = 1 << ++lgAuxArrInts;
|
|
250
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
|
251
|
+
auxIntArr = intAlloc().allocate(newArrLen);
|
|
252
|
+
std::fill(auxIntArr, auxIntArr + newArrLen, 0);
|
|
253
|
+
for (int i = 0; i < oldArrLen; ++i) {
|
|
254
|
+
const int fetched = oldArray[i];
|
|
255
|
+
if (fetched != HllUtil<A>::EMPTY) {
|
|
256
|
+
// find empty in new array
|
|
257
|
+
const int idx = find(auxIntArr, lgAuxArrInts, lgConfigK, fetched & configKmask);
|
|
258
|
+
auxIntArr[~idx] = fetched;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
intAlloc().deallocate(oldArray, oldArrLen);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
//Searches the Aux arr hash table for an empty or a matching slotNo depending on the context.
|
|
266
|
+
//If entire entry is empty, returns one's complement of index = found empty.
|
|
267
|
+
//If entry contains given slotNo, returns its index = found slotNo.
|
|
268
|
+
//Continues searching.
|
|
269
|
+
//If the probe comes back to original index, throws an exception.
|
|
270
|
+
template<typename A>
|
|
271
|
+
int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgConfigK,
|
|
272
|
+
const int slotNo) {
|
|
273
|
+
const int auxArrMask = (1 << lgAuxArrInts) - 1;
|
|
274
|
+
const int configKmask = (1 << lgConfigK) - 1;
|
|
275
|
+
int probe = slotNo & auxArrMask;
|
|
276
|
+
const int loopIndex = probe;
|
|
277
|
+
do {
|
|
278
|
+
const int arrVal = auxArr[probe];
|
|
279
|
+
if (arrVal == HllUtil<A>::EMPTY) { //Compares on entire entry
|
|
280
|
+
return ~probe; //empty
|
|
281
|
+
}
|
|
282
|
+
else if (slotNo == (arrVal & configKmask)) { //Compares only on slotNo
|
|
283
|
+
return probe; //found given slotNo, return probe = index into aux array
|
|
284
|
+
}
|
|
285
|
+
const int stride = (slotNo >> lgAuxArrInts) | 1;
|
|
286
|
+
probe = (probe + stride) & auxArrMask;
|
|
287
|
+
} while (probe != loopIndex);
|
|
288
|
+
throw std::runtime_error("Key not found and no empty slots!");
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
template<typename A>
|
|
292
|
+
coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
|
|
293
|
+
return coupon_iterator<A>(auxIntArr, 1 << lgAuxArrInts, 0, all);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
template<typename A>
|
|
297
|
+
coupon_iterator<A> AuxHashMap<A>::end() const {
|
|
298
|
+
return coupon_iterator<A>(auxIntArr, 1 << lgAuxArrInts, 1 << lgAuxArrInts, false);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
#endif // _AUXHASHMAP_INTERNAL_HPP_
|