datasketches 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
- data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
- data/vendor/datasketches-cpp/python/README.md +6 -3
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
- data/vendor/datasketches-cpp/setup.py +5 -3
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
- metadata +43 -34
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -22,13 +22,15 @@
|
|
22
22
|
|
23
23
|
#include "frequent_items_sketch.hpp"
|
24
24
|
#include "test_type.hpp"
|
25
|
+
#include "test_allocator.hpp"
|
25
26
|
|
26
27
|
namespace datasketches {
|
27
28
|
|
28
|
-
|
29
|
+
using frequent_test_type_sketch = frequent_items_sketch<test_type, float, test_type_hash, test_type_equal, test_type_serde, test_allocator<test_type>>;
|
30
|
+
using alloc = test_allocator<test_type>;
|
29
31
|
|
30
32
|
TEST_CASE("frequent items: custom type", "[frequent_items_sketch]") {
|
31
|
-
frequent_test_type_sketch sketch(3);
|
33
|
+
frequent_test_type_sketch sketch(3, frequent_test_type_sketch::LG_MIN_MAP_SIZE, 0);
|
32
34
|
sketch.update(1, 10); // should survive the purge
|
33
35
|
sketch.update(2);
|
34
36
|
sketch.update(3);
|
@@ -41,35 +43,36 @@ TEST_CASE("frequent items: custom type", "[frequent_items_sketch]") {
|
|
41
43
|
REQUIRE_FALSE(sketch.is_empty());
|
42
44
|
REQUIRE(sketch.get_total_weight() == 17);
|
43
45
|
REQUIRE(sketch.get_estimate(1) == 10);
|
44
|
-
//std::cerr << "num active: " << sketch.get_num_active_items() << std::endl;
|
45
46
|
|
46
|
-
//std::cerr << "get frequent items" << std::endl;
|
47
47
|
auto items = sketch.get_frequent_items(frequent_items_error_type::NO_FALSE_POSITIVES);
|
48
48
|
REQUIRE(items.size() == 1); // only 1 item should be above threshold
|
49
49
|
REQUIRE(items[0].get_item().get_value() == 1);
|
50
50
|
REQUIRE(items[0].get_estimate() == 10);
|
51
51
|
|
52
52
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
53
|
-
//std::cerr << "serialize" << std::endl;
|
54
53
|
sketch.serialize(s);
|
55
|
-
|
56
|
-
auto sketch2 = frequent_test_type_sketch::deserialize(s);
|
54
|
+
auto sketch2 = frequent_test_type_sketch::deserialize(s, alloc(0));
|
57
55
|
REQUIRE_FALSE(sketch2.is_empty());
|
58
56
|
REQUIRE(sketch2.get_total_weight() == 17);
|
59
57
|
REQUIRE(sketch2.get_estimate(1) == 10);
|
60
58
|
REQUIRE(sketch.get_num_active_items() == sketch2.get_num_active_items());
|
61
59
|
REQUIRE(sketch.get_maximum_error() == sketch2.get_maximum_error());
|
62
|
-
//std::cerr << "end" << std::endl;
|
63
60
|
|
64
|
-
|
61
|
+
auto bytes = sketch.serialize();
|
62
|
+
auto sketch3 = frequent_test_type_sketch::deserialize(bytes.data(), bytes.size(), 0);
|
63
|
+
REQUIRE_FALSE(sketch3.is_empty());
|
64
|
+
REQUIRE(sketch3.get_total_weight() == 17);
|
65
|
+
REQUIRE(sketch3.get_estimate(1) == 10);
|
66
|
+
REQUIRE(sketch.get_num_active_items() == sketch3.get_num_active_items());
|
67
|
+
REQUIRE(sketch.get_maximum_error() == sketch3.get_maximum_error());
|
65
68
|
}
|
66
69
|
|
67
70
|
// this is to see the debug print from test_type if enabled there to make sure items are moved
|
68
71
|
TEST_CASE("frequent items: moving merge", "[frequent_items_sketch]") {
|
69
|
-
frequent_test_type_sketch sketch1(3);
|
72
|
+
frequent_test_type_sketch sketch1(3, frequent_test_type_sketch::LG_MIN_MAP_SIZE, 0);
|
70
73
|
sketch1.update(1);
|
71
74
|
|
72
|
-
frequent_test_type_sketch sketch2(3);
|
75
|
+
frequent_test_type_sketch sketch2(3, frequent_test_type_sketch::LG_MIN_MAP_SIZE, 0);
|
73
76
|
sketch2.update(2);
|
74
77
|
|
75
78
|
sketch2.merge(std::move(sketch1));
|
@@ -77,7 +80,7 @@ TEST_CASE("frequent items: moving merge", "[frequent_items_sketch]") {
|
|
77
80
|
}
|
78
81
|
|
79
82
|
TEST_CASE("frequent items: negative weight", "[frequent_items_sketch]") {
|
80
|
-
frequent_test_type_sketch sketch(3);
|
83
|
+
frequent_test_type_sketch sketch(3, frequent_test_type_sketch::LG_MIN_MAP_SIZE, 0);
|
81
84
|
REQUIRE_THROWS_AS(sketch.update(1, -1), std::invalid_argument);
|
82
85
|
}
|
83
86
|
|
@@ -24,20 +24,20 @@
|
|
24
24
|
namespace datasketches {
|
25
25
|
|
26
26
|
TEST_CASE("reverse purge hash map: empty", "[frequent_items_sketch]") {
|
27
|
-
reverse_purge_hash_map<int> map(3, 3);
|
27
|
+
reverse_purge_hash_map<int> map(3, 3, std::allocator<int>());
|
28
28
|
REQUIRE(map.get_num_active() == 0);
|
29
29
|
REQUIRE(map.get_lg_cur_size() == 3); // static_cast<uint8_t>(3)
|
30
30
|
}
|
31
31
|
|
32
32
|
TEST_CASE("reverse purge hash map: one item", "[frequent_items_sketch]") {
|
33
|
-
reverse_purge_hash_map<int> map(3, 3);
|
33
|
+
reverse_purge_hash_map<int> map(3, 3, std::allocator<int>());
|
34
34
|
map.adjust_or_insert(1, 1);
|
35
35
|
REQUIRE(map.get_num_active() == 1);
|
36
36
|
REQUIRE(map.get(1) == 1);
|
37
37
|
}
|
38
38
|
|
39
39
|
TEST_CASE("reverse purge hash map: iterator", "[frequent_items_sketch]") {
|
40
|
-
reverse_purge_hash_map<int> map(3, 4);
|
40
|
+
reverse_purge_hash_map<int> map(3, 4, std::allocator<int>());
|
41
41
|
for (int i = 0; i < 11; i++) map.adjust_or_insert(i, 1); // this should fit with no purge
|
42
42
|
int sum = 0;
|
43
43
|
for (auto &it: map) sum += it.second;
|
@@ -35,6 +35,7 @@ target_compile_features(hll INTERFACE cxx_std_11)
|
|
35
35
|
# TODO: would be useful if this didn't need to be reproduced in target_sources(), too
|
36
36
|
set(hll_HEADERS "")
|
37
37
|
list(APPEND hll_HEADERS "include/hll.hpp;include/AuxHashMap.hpp;include/CompositeInterpolationXTable.hpp")
|
38
|
+
list(APPEND hll_HEADERS "include/hll.private.hpp;include/HllSketchImplFactory.hpp")
|
38
39
|
list(APPEND hll_HEADERS "include/CouponHashSet.hpp;include/CouponList.hpp")
|
39
40
|
list(APPEND hll_HEADERS "include/CubicInterpolation.hpp;include/HarmonicNumbers.hpp;include/Hll4Array.hpp")
|
40
41
|
list(APPEND hll_HEADERS "include/Hll6Array.hpp;include/Hll8Array.hpp;include/HllArray.hpp")
|
@@ -60,6 +61,7 @@ install(FILES ${hll_HEADERS}
|
|
60
61
|
target_sources(hll
|
61
62
|
INTERFACE
|
62
63
|
${CMAKE_CURRENT_SOURCE_DIR}/include/hll.hpp
|
64
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/hll.private.hpp
|
63
65
|
${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap.hpp
|
64
66
|
${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable.hpp
|
65
67
|
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet.hpp
|
@@ -71,6 +73,7 @@ target_sources(hll
|
|
71
73
|
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array.hpp
|
72
74
|
${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray.hpp
|
73
75
|
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl.hpp
|
76
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImplFactory.hpp
|
74
77
|
${CMAKE_CURRENT_SOURCE_DIR}/include/HllUtil.hpp
|
75
78
|
${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables.hpp
|
76
79
|
${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator.hpp
|
@@ -26,42 +26,28 @@
|
|
26
26
|
namespace datasketches {
|
27
27
|
|
28
28
|
template<typename A>
|
29
|
-
AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK)
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
auxIntArr = intAlloc().allocate(numItems);
|
36
|
-
std::fill(auxIntArr, auxIntArr + numItems, 0);
|
37
|
-
}
|
38
|
-
|
39
|
-
template<typename A>
|
40
|
-
AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK) {
|
41
|
-
return new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK);
|
42
|
-
}
|
29
|
+
AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator):
|
30
|
+
lgConfigK(lgConfigK),
|
31
|
+
lgAuxArrInts(lgAuxArrInts),
|
32
|
+
auxCount(0),
|
33
|
+
entries(1 << lgAuxArrInts, 0, allocator)
|
34
|
+
{}
|
43
35
|
|
44
36
|
template<typename A>
|
45
|
-
AuxHashMap<A>::
|
46
|
-
|
47
|
-
lgAuxArrInts(that.lgAuxArrInts),
|
48
|
-
auxCount(that.auxCount) {
|
49
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
50
|
-
const int numItems = 1 << lgAuxArrInts;
|
51
|
-
auxIntArr = intAlloc().allocate(numItems);
|
52
|
-
std::copy(that.auxIntArr, that.auxIntArr + numItems, auxIntArr);
|
37
|
+
AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator) {
|
38
|
+
return new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK, allocator);
|
53
39
|
}
|
54
40
|
|
55
41
|
template<typename A>
|
56
42
|
AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) {
|
57
|
-
return new (ahmAlloc().allocate(1)) AuxHashMap<A>(that);
|
43
|
+
return new (ahmAlloc(that.entries.get_allocator()).allocate(1)) AuxHashMap<A>(that);
|
58
44
|
}
|
59
45
|
|
60
46
|
template<typename A>
|
61
47
|
AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
|
62
48
|
int lgConfigK,
|
63
49
|
int auxCount, int lgAuxArrInts,
|
64
|
-
bool srcCompact) {
|
50
|
+
bool srcCompact, const A& allocator) {
|
65
51
|
int lgArrInts = lgAuxArrInts;
|
66
52
|
if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
|
67
53
|
lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
|
@@ -77,7 +63,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
|
|
77
63
|
if (len < auxCount * sizeof(int)) {
|
78
64
|
throw std::out_of_range("Input array too small to hold AuxHashMap image");
|
79
65
|
}
|
80
|
-
auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
|
66
|
+
auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
|
81
67
|
for (int i = 0; i < auxCount; ++i) {
|
82
68
|
int pair = auxPtr[i];
|
83
69
|
int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
|
@@ -89,7 +75,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
|
|
89
75
|
if (len < itemsToRead * sizeof(int)) {
|
90
76
|
throw std::out_of_range("Input array too small to hold AuxHashMap image");
|
91
77
|
}
|
92
|
-
auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
|
78
|
+
auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
|
93
79
|
for (int i = 0; i < itemsToRead; ++i) {
|
94
80
|
int pair = auxPtr[i];
|
95
81
|
if (pair == HllUtil<A>::EMPTY) { continue; }
|
@@ -110,7 +96,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
|
|
110
96
|
template<typename A>
|
111
97
|
AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
|
112
98
|
const int auxCount, const int lgAuxArrInts,
|
113
|
-
const bool srcCompact) {
|
99
|
+
const bool srcCompact, const A& allocator) {
|
114
100
|
int lgArrInts = lgAuxArrInts;
|
115
101
|
if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
|
116
102
|
lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
|
@@ -118,7 +104,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
|
|
118
104
|
lgArrInts = lgAuxArrInts;
|
119
105
|
}
|
120
106
|
|
121
|
-
AuxHashMap<A>* auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
|
107
|
+
AuxHashMap<A>* auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
|
122
108
|
typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
|
123
109
|
aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter());
|
124
110
|
|
@@ -152,24 +138,18 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
|
|
152
138
|
return aux_ptr.release();
|
153
139
|
}
|
154
140
|
|
155
|
-
template<typename A>
|
156
|
-
AuxHashMap<A>::~AuxHashMap<A>() {
|
157
|
-
// should be no way to have an object without a valid array
|
158
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
159
|
-
intAlloc().deallocate(auxIntArr, 1 << lgAuxArrInts);
|
160
|
-
}
|
161
|
-
|
162
141
|
template<typename A>
|
163
142
|
std::function<void(AuxHashMap<A>*)> AuxHashMap<A>::make_deleter() {
|
164
143
|
return [](AuxHashMap<A>* ptr) {
|
144
|
+
ahmAlloc alloc(ptr->entries.get_allocator());
|
165
145
|
ptr->~AuxHashMap();
|
166
|
-
|
146
|
+
alloc.deallocate(ptr, 1);
|
167
147
|
};
|
168
148
|
}
|
169
149
|
|
170
150
|
template<typename A>
|
171
151
|
AuxHashMap<A>* AuxHashMap<A>::copy() const {
|
172
|
-
return new (ahmAlloc().allocate(1)) AuxHashMap<A>(*this);
|
152
|
+
return new (ahmAlloc(entries.get_allocator()).allocate(1)) AuxHashMap<A>(*this);
|
173
153
|
}
|
174
154
|
|
175
155
|
template<typename A>
|
@@ -179,7 +159,7 @@ int AuxHashMap<A>::getAuxCount() const {
|
|
179
159
|
|
180
160
|
template<typename A>
|
181
161
|
int* AuxHashMap<A>::getAuxIntArr(){
|
182
|
-
return
|
162
|
+
return entries.data();
|
183
163
|
}
|
184
164
|
|
185
165
|
template<typename A>
|
@@ -199,7 +179,7 @@ int AuxHashMap<A>::getUpdatableSizeBytes() const {
|
|
199
179
|
|
200
180
|
template<typename A>
|
201
181
|
void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
|
202
|
-
const int index = find(
|
182
|
+
const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
|
203
183
|
const int entry_pair = HllUtil<A>::pair(slotNo, value);
|
204
184
|
if (index >= 0) {
|
205
185
|
throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: "
|
@@ -207,16 +187,16 @@ void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
|
|
207
187
|
}
|
208
188
|
|
209
189
|
// found empty entry
|
210
|
-
|
190
|
+
entries[~index] = entry_pair;
|
211
191
|
++auxCount;
|
212
192
|
checkGrow();
|
213
193
|
}
|
214
194
|
|
215
195
|
template<typename A>
|
216
196
|
int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
|
217
|
-
const int index = find(
|
197
|
+
const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
|
218
198
|
if (index >= 0) {
|
219
|
-
return HllUtil<A>::getValue(
|
199
|
+
return HllUtil<A>::getValue(entries[index]);
|
220
200
|
}
|
221
201
|
|
222
202
|
throw std::invalid_argument("slotNo not found: " + std::to_string(slotNo));
|
@@ -224,9 +204,9 @@ int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
|
|
224
204
|
|
225
205
|
template<typename A>
|
226
206
|
void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
|
227
|
-
const int idx = find(
|
207
|
+
const int idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
|
228
208
|
if (idx >= 0) {
|
229
|
-
|
209
|
+
entries[idx] = HllUtil<A>::pair(slotNo, value);
|
230
210
|
return;
|
231
211
|
}
|
232
212
|
|
@@ -243,23 +223,18 @@ void AuxHashMap<A>::checkGrow() {
|
|
243
223
|
|
244
224
|
template<typename A>
|
245
225
|
void AuxHashMap<A>::growAuxSpace() {
|
246
|
-
int* oldArray = auxIntArr;
|
247
|
-
const int oldArrLen = 1 << lgAuxArrInts;
|
248
226
|
const int configKmask = (1 << lgConfigK) - 1;
|
249
227
|
const int newArrLen = 1 << ++lgAuxArrInts;
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
for (int i = 0; i < oldArrLen; ++i) {
|
254
|
-
const int fetched = oldArray[i];
|
228
|
+
vector_int entries_new(newArrLen, 0, entries.get_allocator());
|
229
|
+
for (size_t i = 0; i < entries.size(); ++i) {
|
230
|
+
const int fetched = entries[i];
|
255
231
|
if (fetched != HllUtil<A>::EMPTY) {
|
256
232
|
// find empty in new array
|
257
|
-
const int idx = find(
|
258
|
-
|
233
|
+
const int idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
|
234
|
+
entries_new[~idx] = fetched;
|
259
235
|
}
|
260
236
|
}
|
261
|
-
|
262
|
-
intAlloc().deallocate(oldArray, oldArrLen);
|
237
|
+
entries = std::move(entries_new);
|
263
238
|
}
|
264
239
|
|
265
240
|
//Searches the Aux arr hash table for an empty or a matching slotNo depending on the context.
|
@@ -290,12 +265,12 @@ int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgC
|
|
290
265
|
|
291
266
|
template<typename A>
|
292
267
|
coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
|
293
|
-
return coupon_iterator<A>(
|
268
|
+
return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 0, all);
|
294
269
|
}
|
295
270
|
|
296
271
|
template<typename A>
|
297
272
|
coupon_iterator<A> AuxHashMap<A>::end() const {
|
298
|
-
return coupon_iterator<A>(
|
273
|
+
return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 1 << lgAuxArrInts, false);
|
299
274
|
}
|
300
275
|
|
301
276
|
}
|
@@ -28,22 +28,21 @@
|
|
28
28
|
|
29
29
|
namespace datasketches {
|
30
30
|
|
31
|
-
template<typename A
|
31
|
+
template<typename A>
|
32
32
|
class AuxHashMap final {
|
33
33
|
public:
|
34
|
-
|
35
|
-
|
36
|
-
static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK);
|
34
|
+
AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
|
35
|
+
static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
|
37
36
|
static AuxHashMap* newAuxHashMap(const AuxHashMap<A>& that);
|
38
37
|
|
39
38
|
static AuxHashMap* deserialize(const void* bytes, size_t len,
|
40
39
|
int lgConfigK,
|
41
40
|
int auxCount, int lgAuxArrInts,
|
42
|
-
bool srcCompact);
|
41
|
+
bool srcCompact, const A& allocator);
|
43
42
|
static AuxHashMap* deserialize(std::istream& is, int lgConfigK,
|
44
43
|
int auxCount, int lgAuxArrInts,
|
45
|
-
bool srcCompact);
|
46
|
-
virtual ~AuxHashMap();
|
44
|
+
bool srcCompact, const A& allocator);
|
45
|
+
virtual ~AuxHashMap() = default;
|
47
46
|
static std::function<void(AuxHashMap<A>*)> make_deleter();
|
48
47
|
|
49
48
|
AuxHashMap* copy() const;
|
@@ -64,6 +63,8 @@ class AuxHashMap final {
|
|
64
63
|
private:
|
65
64
|
typedef typename std::allocator_traits<A>::template rebind_alloc<AuxHashMap<A>> ahmAlloc;
|
66
65
|
|
66
|
+
using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
|
67
|
+
|
67
68
|
// static so it can be used when resizing
|
68
69
|
static int find(const int* auxArr, int lgAuxArrInts, int lgConfigK, int slotNo);
|
69
70
|
|
@@ -73,7 +74,7 @@ class AuxHashMap final {
|
|
73
74
|
const int lgConfigK;
|
74
75
|
int lgAuxArrInts;
|
75
76
|
int auxCount;
|
76
|
-
|
77
|
+
vector_int entries;
|
77
78
|
};
|
78
79
|
|
79
80
|
}
|
@@ -24,7 +24,7 @@
|
|
24
24
|
|
25
25
|
namespace datasketches {
|
26
26
|
|
27
|
-
template<typename A = std::allocator<
|
27
|
+
template<typename A = std::allocator<uint8_t>>
|
28
28
|
class CompositeInterpolationXTable {
|
29
29
|
public:
|
30
30
|
static int get_y_stride(int logK);
|
@@ -37,4 +37,4 @@ class CompositeInterpolationXTable {
|
|
37
37
|
|
38
38
|
#include "CompositeInterpolationXTable-internal.hpp"
|
39
39
|
|
40
|
-
#endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
|
40
|
+
#endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
|
@@ -31,8 +31,8 @@ template<typename A>
|
|
31
31
|
static int find(const int* array, const int lgArrInts, const int coupon);
|
32
32
|
|
33
33
|
template<typename A>
|
34
|
-
CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHllType)
|
35
|
-
: CouponList<A>(lgConfigK, tgtHllType, hll_mode::SET)
|
34
|
+
CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHllType, const A& allocator)
|
35
|
+
: CouponList<A>(lgConfigK, tgtHllType, hll_mode::SET, allocator)
|
36
36
|
{
|
37
37
|
if (lgConfigK <= 7) {
|
38
38
|
throw std::invalid_argument("CouponHashSet must be initialized with lgConfigK > 7. Found: "
|
@@ -40,28 +40,22 @@ CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHl
|
|
40
40
|
}
|
41
41
|
}
|
42
42
|
|
43
|
-
template<typename A>
|
44
|
-
CouponHashSet<A>::CouponHashSet(const CouponHashSet<A>& that)
|
45
|
-
: CouponList<A>(that) {}
|
46
|
-
|
47
43
|
template<typename A>
|
48
44
|
CouponHashSet<A>::CouponHashSet(const CouponHashSet<A>& that, const target_hll_type tgtHllType)
|
49
45
|
: CouponList<A>(that, tgtHllType) {}
|
50
46
|
|
51
|
-
template<typename A>
|
52
|
-
CouponHashSet<A>::~CouponHashSet() {}
|
53
|
-
|
54
47
|
template<typename A>
|
55
48
|
std::function<void(HllSketchImpl<A>*)> CouponHashSet<A>::get_deleter() const {
|
56
49
|
return [](HllSketchImpl<A>* ptr) {
|
57
50
|
CouponHashSet<A>* chs = static_cast<CouponHashSet<A>*>(ptr);
|
51
|
+
ChsAlloc chsa(chs->getAllocator());
|
58
52
|
chs->~CouponHashSet();
|
59
|
-
|
53
|
+
chsa.deallocate(chs, 1);
|
60
54
|
};
|
61
55
|
}
|
62
56
|
|
63
57
|
template<typename A>
|
64
|
-
CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
|
58
|
+
CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len, const A& allocator) {
|
65
59
|
if (len < HllUtil<A>::HASH_SET_INT_ARR_START) { // hard-coded
|
66
60
|
throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
|
67
61
|
}
|
@@ -79,7 +73,7 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
|
|
79
73
|
|
80
74
|
const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
|
81
75
|
if (mode != SET) {
|
82
|
-
throw std::invalid_argument("Calling set
|
76
|
+
throw std::invalid_argument("Calling set constructor with non-set mode data");
|
83
77
|
}
|
84
78
|
|
85
79
|
const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
|
@@ -106,7 +100,8 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
|
|
106
100
|
+ ", found: " + std::to_string(len));
|
107
101
|
}
|
108
102
|
|
109
|
-
|
103
|
+
ChsAlloc chsa(allocator);
|
104
|
+
CouponHashSet<A>* sketch = new (chsa.allocate(1)) CouponHashSet<A>(lgK, tgtHllType, allocator);
|
110
105
|
|
111
106
|
if (compactFlag) {
|
112
107
|
const uint8_t* curPos = data + HllUtil<A>::HASH_SET_INT_ARR_START;
|
@@ -116,24 +111,19 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
|
|
116
111
|
sketch->couponUpdate(coupon);
|
117
112
|
}
|
118
113
|
} else {
|
119
|
-
|
120
|
-
const size_t oldArrLen = 1 << sketch->lgCouponArrInts;
|
121
|
-
sketch->lgCouponArrInts = lgArrInts;
|
122
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
123
|
-
sketch->couponIntArr = intAlloc().allocate(1 << lgArrInts);
|
114
|
+
sketch->coupons.resize(1 << lgArrInts);
|
124
115
|
sketch->couponCount = couponCount;
|
125
116
|
// only need to read valid coupons, unlike in stream case
|
126
|
-
std::memcpy(sketch->
|
117
|
+
std::memcpy(sketch->coupons.data(),
|
127
118
|
data + HllUtil<A>::HASH_SET_INT_ARR_START,
|
128
119
|
couponCount * sizeof(int));
|
129
|
-
intAlloc().deallocate(oldArr, oldArrLen);
|
130
120
|
}
|
131
121
|
|
132
122
|
return sketch;
|
133
123
|
}
|
134
124
|
|
135
125
|
template<typename A>
|
136
|
-
CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
|
126
|
+
CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is, const A& allocator) {
|
137
127
|
uint8_t listHeader[8];
|
138
128
|
is.read((char*)listHeader, 8 * sizeof(uint8_t));
|
139
129
|
|
@@ -149,7 +139,7 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
|
|
149
139
|
|
150
140
|
hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
|
151
141
|
if (mode != SET) {
|
152
|
-
throw std::invalid_argument("Calling set
|
142
|
+
throw std::invalid_argument("Calling set constructor with non-set mode data");
|
153
143
|
}
|
154
144
|
|
155
145
|
target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
|
@@ -168,7 +158,8 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
|
|
168
158
|
lgArrInts = HllUtil<A>::computeLgArrInts(SET, couponCount, lgK);
|
169
159
|
}
|
170
160
|
|
171
|
-
|
161
|
+
ChsAlloc chsa(allocator);
|
162
|
+
CouponHashSet<A>* sketch = new (chsa.allocate(1)) CouponHashSet<A>(lgK, tgtHllType, allocator);
|
172
163
|
typedef std::unique_ptr<CouponHashSet<A>, std::function<void(HllSketchImpl<A>*)>> coupon_hash_set_ptr;
|
173
164
|
coupon_hash_set_ptr ptr(sketch, sketch->get_deleter());
|
174
165
|
|
@@ -181,13 +172,10 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
|
|
181
172
|
sketch->couponUpdate(coupon);
|
182
173
|
}
|
183
174
|
} else {
|
184
|
-
|
185
|
-
intAlloc().deallocate(sketch->couponIntArr, 1 << sketch->lgCouponArrInts);
|
186
|
-
sketch->lgCouponArrInts = lgArrInts;
|
187
|
-
sketch->couponIntArr = intAlloc().allocate(1 << lgArrInts);
|
175
|
+
sketch->coupons.resize(1 << lgArrInts);
|
188
176
|
sketch->couponCount = couponCount;
|
189
177
|
// for stream processing, read entire list so read pointer ends up set correctly
|
190
|
-
is.read((char*)sketch->
|
178
|
+
is.read((char*)sketch->coupons.data(), sketch->coupons.size() * sizeof(int));
|
191
179
|
}
|
192
180
|
|
193
181
|
if (!is.good())
|
@@ -198,21 +186,24 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
|
|
198
186
|
|
199
187
|
template<typename A>
|
200
188
|
CouponHashSet<A>* CouponHashSet<A>::copy() const {
|
201
|
-
|
189
|
+
ChsAlloc chsa(this->coupons.get_allocator());
|
190
|
+
return new (chsa.allocate(1)) CouponHashSet<A>(*this);
|
202
191
|
}
|
203
192
|
|
204
193
|
template<typename A>
|
205
194
|
CouponHashSet<A>* CouponHashSet<A>::copyAs(const target_hll_type tgtHllType) const {
|
206
|
-
|
195
|
+
ChsAlloc chsa(this->coupons.get_allocator());
|
196
|
+
return new (chsa.allocate(1)) CouponHashSet<A>(*this, tgtHllType);
|
207
197
|
}
|
208
198
|
|
209
199
|
template<typename A>
|
210
200
|
HllSketchImpl<A>* CouponHashSet<A>::couponUpdate(int coupon) {
|
211
|
-
const
|
201
|
+
const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(this->coupons.size());
|
202
|
+
const int index = find<A>(this->coupons.data(), lgCouponArrInts, coupon);
|
212
203
|
if (index >= 0) {
|
213
204
|
return this; // found duplicate, ignore
|
214
205
|
}
|
215
|
-
this->
|
206
|
+
this->coupons[~index] = coupon; // found empty
|
216
207
|
++this->couponCount;
|
217
208
|
if (checkGrowOrPromote()) {
|
218
209
|
return this->promoteHeapListOrSetToHll(*this);
|
@@ -232,39 +223,34 @@ int CouponHashSet<A>::getPreInts() const {
|
|
232
223
|
|
233
224
|
template<typename A>
|
234
225
|
bool CouponHashSet<A>::checkGrowOrPromote() {
|
235
|
-
if ((HllUtil<A>::RESIZE_DENOM * this->couponCount) > (HllUtil<A>::RESIZE_NUMER *
|
236
|
-
|
226
|
+
if (static_cast<size_t>(HllUtil<A>::RESIZE_DENOM * this->couponCount) > (HllUtil<A>::RESIZE_NUMER * this->coupons.size())) {
|
227
|
+
const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(this->coupons.size());
|
228
|
+
if (lgCouponArrInts == (this->lgConfigK - 3)) { // at max size
|
237
229
|
return true; // promote to HLL
|
238
230
|
}
|
239
|
-
|
240
|
-
growHashSet(this->lgCouponArrInts, tgtLgCoupArrSize);
|
231
|
+
growHashSet(lgCouponArrInts + 1);
|
241
232
|
}
|
242
233
|
return false;
|
243
234
|
}
|
244
235
|
|
245
236
|
template<typename A>
|
246
|
-
void CouponHashSet<A>::growHashSet(
|
237
|
+
void CouponHashSet<A>::growHashSet(int tgtLgCoupArrSize) {
|
247
238
|
const int tgtLen = 1 << tgtLgCoupArrSize;
|
248
|
-
|
249
|
-
int* tgtCouponIntArr = intAlloc().allocate(tgtLen);
|
250
|
-
std::fill(tgtCouponIntArr, tgtCouponIntArr + tgtLen, 0);
|
239
|
+
vector_int coupons_new(tgtLen, 0, this->coupons.get_allocator());
|
251
240
|
|
252
|
-
const int srcLen =
|
241
|
+
const int srcLen = this->coupons.size();
|
253
242
|
for (int i = 0; i < srcLen; ++i) { // scan existing array for non-zero values
|
254
|
-
const int fetched = this->
|
243
|
+
const int fetched = this->coupons[i];
|
255
244
|
if (fetched != HllUtil<A>::EMPTY) {
|
256
|
-
const int idx = find<A>(
|
245
|
+
const int idx = find<A>(coupons_new.data(), tgtLgCoupArrSize, fetched); // search TGT array
|
257
246
|
if (idx < 0) { // found EMPTY
|
258
|
-
|
247
|
+
coupons_new[~idx] = fetched; // insert
|
259
248
|
continue;
|
260
249
|
}
|
261
250
|
throw std::runtime_error("Error: Found duplicate coupon");
|
262
251
|
}
|
263
252
|
}
|
264
|
-
|
265
|
-
intAlloc().deallocate(this->couponIntArr, 1 << this->lgCouponArrInts);
|
266
|
-
this->couponIntArr = tgtCouponIntArr;
|
267
|
-
this->lgCouponArrInts = tgtLgCoupArrSize;
|
253
|
+
this->coupons = std::move(coupons_new);
|
268
254
|
}
|
269
255
|
|
270
256
|
template<typename A>
|