datasketches 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
- data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
- data/vendor/datasketches-cpp/python/README.md +6 -3
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
- data/vendor/datasketches-cpp/setup.py +5 -3
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
- metadata +43 -34
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
|
@@ -22,13 +22,15 @@
|
|
|
22
22
|
|
|
23
23
|
#include "frequent_items_sketch.hpp"
|
|
24
24
|
#include "test_type.hpp"
|
|
25
|
+
#include "test_allocator.hpp"
|
|
25
26
|
|
|
26
27
|
namespace datasketches {
|
|
27
28
|
|
|
28
|
-
|
|
29
|
+
using frequent_test_type_sketch = frequent_items_sketch<test_type, float, test_type_hash, test_type_equal, test_type_serde, test_allocator<test_type>>;
|
|
30
|
+
using alloc = test_allocator<test_type>;
|
|
29
31
|
|
|
30
32
|
TEST_CASE("frequent items: custom type", "[frequent_items_sketch]") {
|
|
31
|
-
frequent_test_type_sketch sketch(3);
|
|
33
|
+
frequent_test_type_sketch sketch(3, frequent_test_type_sketch::LG_MIN_MAP_SIZE, 0);
|
|
32
34
|
sketch.update(1, 10); // should survive the purge
|
|
33
35
|
sketch.update(2);
|
|
34
36
|
sketch.update(3);
|
|
@@ -41,35 +43,36 @@ TEST_CASE("frequent items: custom type", "[frequent_items_sketch]") {
|
|
|
41
43
|
REQUIRE_FALSE(sketch.is_empty());
|
|
42
44
|
REQUIRE(sketch.get_total_weight() == 17);
|
|
43
45
|
REQUIRE(sketch.get_estimate(1) == 10);
|
|
44
|
-
//std::cerr << "num active: " << sketch.get_num_active_items() << std::endl;
|
|
45
46
|
|
|
46
|
-
//std::cerr << "get frequent items" << std::endl;
|
|
47
47
|
auto items = sketch.get_frequent_items(frequent_items_error_type::NO_FALSE_POSITIVES);
|
|
48
48
|
REQUIRE(items.size() == 1); // only 1 item should be above threshold
|
|
49
49
|
REQUIRE(items[0].get_item().get_value() == 1);
|
|
50
50
|
REQUIRE(items[0].get_estimate() == 10);
|
|
51
51
|
|
|
52
52
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
53
|
-
//std::cerr << "serialize" << std::endl;
|
|
54
53
|
sketch.serialize(s);
|
|
55
|
-
|
|
56
|
-
auto sketch2 = frequent_test_type_sketch::deserialize(s);
|
|
54
|
+
auto sketch2 = frequent_test_type_sketch::deserialize(s, alloc(0));
|
|
57
55
|
REQUIRE_FALSE(sketch2.is_empty());
|
|
58
56
|
REQUIRE(sketch2.get_total_weight() == 17);
|
|
59
57
|
REQUIRE(sketch2.get_estimate(1) == 10);
|
|
60
58
|
REQUIRE(sketch.get_num_active_items() == sketch2.get_num_active_items());
|
|
61
59
|
REQUIRE(sketch.get_maximum_error() == sketch2.get_maximum_error());
|
|
62
|
-
//std::cerr << "end" << std::endl;
|
|
63
60
|
|
|
64
|
-
|
|
61
|
+
auto bytes = sketch.serialize();
|
|
62
|
+
auto sketch3 = frequent_test_type_sketch::deserialize(bytes.data(), bytes.size(), 0);
|
|
63
|
+
REQUIRE_FALSE(sketch3.is_empty());
|
|
64
|
+
REQUIRE(sketch3.get_total_weight() == 17);
|
|
65
|
+
REQUIRE(sketch3.get_estimate(1) == 10);
|
|
66
|
+
REQUIRE(sketch.get_num_active_items() == sketch3.get_num_active_items());
|
|
67
|
+
REQUIRE(sketch.get_maximum_error() == sketch3.get_maximum_error());
|
|
65
68
|
}
|
|
66
69
|
|
|
67
70
|
// this is to see the debug print from test_type if enabled there to make sure items are moved
|
|
68
71
|
TEST_CASE("frequent items: moving merge", "[frequent_items_sketch]") {
|
|
69
|
-
frequent_test_type_sketch sketch1(3);
|
|
72
|
+
frequent_test_type_sketch sketch1(3, frequent_test_type_sketch::LG_MIN_MAP_SIZE, 0);
|
|
70
73
|
sketch1.update(1);
|
|
71
74
|
|
|
72
|
-
frequent_test_type_sketch sketch2(3);
|
|
75
|
+
frequent_test_type_sketch sketch2(3, frequent_test_type_sketch::LG_MIN_MAP_SIZE, 0);
|
|
73
76
|
sketch2.update(2);
|
|
74
77
|
|
|
75
78
|
sketch2.merge(std::move(sketch1));
|
|
@@ -77,7 +80,7 @@ TEST_CASE("frequent items: moving merge", "[frequent_items_sketch]") {
|
|
|
77
80
|
}
|
|
78
81
|
|
|
79
82
|
TEST_CASE("frequent items: negative weight", "[frequent_items_sketch]") {
|
|
80
|
-
frequent_test_type_sketch sketch(3);
|
|
83
|
+
frequent_test_type_sketch sketch(3, frequent_test_type_sketch::LG_MIN_MAP_SIZE, 0);
|
|
81
84
|
REQUIRE_THROWS_AS(sketch.update(1, -1), std::invalid_argument);
|
|
82
85
|
}
|
|
83
86
|
|
|
@@ -24,20 +24,20 @@
|
|
|
24
24
|
namespace datasketches {
|
|
25
25
|
|
|
26
26
|
TEST_CASE("reverse purge hash map: empty", "[frequent_items_sketch]") {
|
|
27
|
-
reverse_purge_hash_map<int> map(3, 3);
|
|
27
|
+
reverse_purge_hash_map<int> map(3, 3, std::allocator<int>());
|
|
28
28
|
REQUIRE(map.get_num_active() == 0);
|
|
29
29
|
REQUIRE(map.get_lg_cur_size() == 3); // static_cast<uint8_t>(3)
|
|
30
30
|
}
|
|
31
31
|
|
|
32
32
|
TEST_CASE("reverse purge hash map: one item", "[frequent_items_sketch]") {
|
|
33
|
-
reverse_purge_hash_map<int> map(3, 3);
|
|
33
|
+
reverse_purge_hash_map<int> map(3, 3, std::allocator<int>());
|
|
34
34
|
map.adjust_or_insert(1, 1);
|
|
35
35
|
REQUIRE(map.get_num_active() == 1);
|
|
36
36
|
REQUIRE(map.get(1) == 1);
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
TEST_CASE("reverse purge hash map: iterator", "[frequent_items_sketch]") {
|
|
40
|
-
reverse_purge_hash_map<int> map(3, 4);
|
|
40
|
+
reverse_purge_hash_map<int> map(3, 4, std::allocator<int>());
|
|
41
41
|
for (int i = 0; i < 11; i++) map.adjust_or_insert(i, 1); // this should fit with no purge
|
|
42
42
|
int sum = 0;
|
|
43
43
|
for (auto &it: map) sum += it.second;
|
|
@@ -35,6 +35,7 @@ target_compile_features(hll INTERFACE cxx_std_11)
|
|
|
35
35
|
# TODO: would be useful if this didn't need to be reproduced in target_sources(), too
|
|
36
36
|
set(hll_HEADERS "")
|
|
37
37
|
list(APPEND hll_HEADERS "include/hll.hpp;include/AuxHashMap.hpp;include/CompositeInterpolationXTable.hpp")
|
|
38
|
+
list(APPEND hll_HEADERS "include/hll.private.hpp;include/HllSketchImplFactory.hpp")
|
|
38
39
|
list(APPEND hll_HEADERS "include/CouponHashSet.hpp;include/CouponList.hpp")
|
|
39
40
|
list(APPEND hll_HEADERS "include/CubicInterpolation.hpp;include/HarmonicNumbers.hpp;include/Hll4Array.hpp")
|
|
40
41
|
list(APPEND hll_HEADERS "include/Hll6Array.hpp;include/Hll8Array.hpp;include/HllArray.hpp")
|
|
@@ -60,6 +61,7 @@ install(FILES ${hll_HEADERS}
|
|
|
60
61
|
target_sources(hll
|
|
61
62
|
INTERFACE
|
|
62
63
|
${CMAKE_CURRENT_SOURCE_DIR}/include/hll.hpp
|
|
64
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/hll.private.hpp
|
|
63
65
|
${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap.hpp
|
|
64
66
|
${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable.hpp
|
|
65
67
|
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet.hpp
|
|
@@ -71,6 +73,7 @@ target_sources(hll
|
|
|
71
73
|
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array.hpp
|
|
72
74
|
${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray.hpp
|
|
73
75
|
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl.hpp
|
|
76
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImplFactory.hpp
|
|
74
77
|
${CMAKE_CURRENT_SOURCE_DIR}/include/HllUtil.hpp
|
|
75
78
|
${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables.hpp
|
|
76
79
|
${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator.hpp
|
|
@@ -26,42 +26,28 @@
|
|
|
26
26
|
namespace datasketches {
|
|
27
27
|
|
|
28
28
|
template<typename A>
|
|
29
|
-
AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
auxIntArr = intAlloc().allocate(numItems);
|
|
36
|
-
std::fill(auxIntArr, auxIntArr + numItems, 0);
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
template<typename A>
|
|
40
|
-
AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK) {
|
|
41
|
-
return new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK);
|
|
42
|
-
}
|
|
29
|
+
AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator):
|
|
30
|
+
lgConfigK(lgConfigK),
|
|
31
|
+
lgAuxArrInts(lgAuxArrInts),
|
|
32
|
+
auxCount(0),
|
|
33
|
+
entries(1 << lgAuxArrInts, 0, allocator)
|
|
34
|
+
{}
|
|
43
35
|
|
|
44
36
|
template<typename A>
|
|
45
|
-
AuxHashMap<A>::
|
|
46
|
-
|
|
47
|
-
lgAuxArrInts(that.lgAuxArrInts),
|
|
48
|
-
auxCount(that.auxCount) {
|
|
49
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
|
50
|
-
const int numItems = 1 << lgAuxArrInts;
|
|
51
|
-
auxIntArr = intAlloc().allocate(numItems);
|
|
52
|
-
std::copy(that.auxIntArr, that.auxIntArr + numItems, auxIntArr);
|
|
37
|
+
AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator) {
|
|
38
|
+
return new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK, allocator);
|
|
53
39
|
}
|
|
54
40
|
|
|
55
41
|
template<typename A>
|
|
56
42
|
AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) {
|
|
57
|
-
return new (ahmAlloc().allocate(1)) AuxHashMap<A>(that);
|
|
43
|
+
return new (ahmAlloc(that.entries.get_allocator()).allocate(1)) AuxHashMap<A>(that);
|
|
58
44
|
}
|
|
59
45
|
|
|
60
46
|
template<typename A>
|
|
61
47
|
AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
|
|
62
48
|
int lgConfigK,
|
|
63
49
|
int auxCount, int lgAuxArrInts,
|
|
64
|
-
bool srcCompact) {
|
|
50
|
+
bool srcCompact, const A& allocator) {
|
|
65
51
|
int lgArrInts = lgAuxArrInts;
|
|
66
52
|
if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
|
|
67
53
|
lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
|
|
@@ -77,7 +63,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
|
|
|
77
63
|
if (len < auxCount * sizeof(int)) {
|
|
78
64
|
throw std::out_of_range("Input array too small to hold AuxHashMap image");
|
|
79
65
|
}
|
|
80
|
-
auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
|
|
66
|
+
auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
|
|
81
67
|
for (int i = 0; i < auxCount; ++i) {
|
|
82
68
|
int pair = auxPtr[i];
|
|
83
69
|
int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
|
|
@@ -89,7 +75,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
|
|
|
89
75
|
if (len < itemsToRead * sizeof(int)) {
|
|
90
76
|
throw std::out_of_range("Input array too small to hold AuxHashMap image");
|
|
91
77
|
}
|
|
92
|
-
auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
|
|
78
|
+
auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
|
|
93
79
|
for (int i = 0; i < itemsToRead; ++i) {
|
|
94
80
|
int pair = auxPtr[i];
|
|
95
81
|
if (pair == HllUtil<A>::EMPTY) { continue; }
|
|
@@ -110,7 +96,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
|
|
|
110
96
|
template<typename A>
|
|
111
97
|
AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
|
|
112
98
|
const int auxCount, const int lgAuxArrInts,
|
|
113
|
-
const bool srcCompact) {
|
|
99
|
+
const bool srcCompact, const A& allocator) {
|
|
114
100
|
int lgArrInts = lgAuxArrInts;
|
|
115
101
|
if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
|
|
116
102
|
lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
|
|
@@ -118,7 +104,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
|
|
|
118
104
|
lgArrInts = lgAuxArrInts;
|
|
119
105
|
}
|
|
120
106
|
|
|
121
|
-
AuxHashMap<A>* auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
|
|
107
|
+
AuxHashMap<A>* auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
|
|
122
108
|
typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
|
|
123
109
|
aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter());
|
|
124
110
|
|
|
@@ -152,24 +138,18 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
|
|
|
152
138
|
return aux_ptr.release();
|
|
153
139
|
}
|
|
154
140
|
|
|
155
|
-
template<typename A>
|
|
156
|
-
AuxHashMap<A>::~AuxHashMap<A>() {
|
|
157
|
-
// should be no way to have an object without a valid array
|
|
158
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
|
159
|
-
intAlloc().deallocate(auxIntArr, 1 << lgAuxArrInts);
|
|
160
|
-
}
|
|
161
|
-
|
|
162
141
|
template<typename A>
|
|
163
142
|
std::function<void(AuxHashMap<A>*)> AuxHashMap<A>::make_deleter() {
|
|
164
143
|
return [](AuxHashMap<A>* ptr) {
|
|
144
|
+
ahmAlloc alloc(ptr->entries.get_allocator());
|
|
165
145
|
ptr->~AuxHashMap();
|
|
166
|
-
|
|
146
|
+
alloc.deallocate(ptr, 1);
|
|
167
147
|
};
|
|
168
148
|
}
|
|
169
149
|
|
|
170
150
|
template<typename A>
|
|
171
151
|
AuxHashMap<A>* AuxHashMap<A>::copy() const {
|
|
172
|
-
return new (ahmAlloc().allocate(1)) AuxHashMap<A>(*this);
|
|
152
|
+
return new (ahmAlloc(entries.get_allocator()).allocate(1)) AuxHashMap<A>(*this);
|
|
173
153
|
}
|
|
174
154
|
|
|
175
155
|
template<typename A>
|
|
@@ -179,7 +159,7 @@ int AuxHashMap<A>::getAuxCount() const {
|
|
|
179
159
|
|
|
180
160
|
template<typename A>
|
|
181
161
|
int* AuxHashMap<A>::getAuxIntArr(){
|
|
182
|
-
return
|
|
162
|
+
return entries.data();
|
|
183
163
|
}
|
|
184
164
|
|
|
185
165
|
template<typename A>
|
|
@@ -199,7 +179,7 @@ int AuxHashMap<A>::getUpdatableSizeBytes() const {
|
|
|
199
179
|
|
|
200
180
|
template<typename A>
|
|
201
181
|
void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
|
|
202
|
-
const int index = find(
|
|
182
|
+
const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
|
|
203
183
|
const int entry_pair = HllUtil<A>::pair(slotNo, value);
|
|
204
184
|
if (index >= 0) {
|
|
205
185
|
throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: "
|
|
@@ -207,16 +187,16 @@ void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
|
|
|
207
187
|
}
|
|
208
188
|
|
|
209
189
|
// found empty entry
|
|
210
|
-
|
|
190
|
+
entries[~index] = entry_pair;
|
|
211
191
|
++auxCount;
|
|
212
192
|
checkGrow();
|
|
213
193
|
}
|
|
214
194
|
|
|
215
195
|
template<typename A>
|
|
216
196
|
int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
|
|
217
|
-
const int index = find(
|
|
197
|
+
const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
|
|
218
198
|
if (index >= 0) {
|
|
219
|
-
return HllUtil<A>::getValue(
|
|
199
|
+
return HllUtil<A>::getValue(entries[index]);
|
|
220
200
|
}
|
|
221
201
|
|
|
222
202
|
throw std::invalid_argument("slotNo not found: " + std::to_string(slotNo));
|
|
@@ -224,9 +204,9 @@ int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
|
|
|
224
204
|
|
|
225
205
|
template<typename A>
|
|
226
206
|
void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
|
|
227
|
-
const int idx = find(
|
|
207
|
+
const int idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
|
|
228
208
|
if (idx >= 0) {
|
|
229
|
-
|
|
209
|
+
entries[idx] = HllUtil<A>::pair(slotNo, value);
|
|
230
210
|
return;
|
|
231
211
|
}
|
|
232
212
|
|
|
@@ -243,23 +223,18 @@ void AuxHashMap<A>::checkGrow() {
|
|
|
243
223
|
|
|
244
224
|
template<typename A>
|
|
245
225
|
void AuxHashMap<A>::growAuxSpace() {
|
|
246
|
-
int* oldArray = auxIntArr;
|
|
247
|
-
const int oldArrLen = 1 << lgAuxArrInts;
|
|
248
226
|
const int configKmask = (1 << lgConfigK) - 1;
|
|
249
227
|
const int newArrLen = 1 << ++lgAuxArrInts;
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
for (int i = 0; i < oldArrLen; ++i) {
|
|
254
|
-
const int fetched = oldArray[i];
|
|
228
|
+
vector_int entries_new(newArrLen, 0, entries.get_allocator());
|
|
229
|
+
for (size_t i = 0; i < entries.size(); ++i) {
|
|
230
|
+
const int fetched = entries[i];
|
|
255
231
|
if (fetched != HllUtil<A>::EMPTY) {
|
|
256
232
|
// find empty in new array
|
|
257
|
-
const int idx = find(
|
|
258
|
-
|
|
233
|
+
const int idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
|
|
234
|
+
entries_new[~idx] = fetched;
|
|
259
235
|
}
|
|
260
236
|
}
|
|
261
|
-
|
|
262
|
-
intAlloc().deallocate(oldArray, oldArrLen);
|
|
237
|
+
entries = std::move(entries_new);
|
|
263
238
|
}
|
|
264
239
|
|
|
265
240
|
//Searches the Aux arr hash table for an empty or a matching slotNo depending on the context.
|
|
@@ -290,12 +265,12 @@ int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgC
|
|
|
290
265
|
|
|
291
266
|
template<typename A>
|
|
292
267
|
coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
|
|
293
|
-
return coupon_iterator<A>(
|
|
268
|
+
return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 0, all);
|
|
294
269
|
}
|
|
295
270
|
|
|
296
271
|
template<typename A>
|
|
297
272
|
coupon_iterator<A> AuxHashMap<A>::end() const {
|
|
298
|
-
return coupon_iterator<A>(
|
|
273
|
+
return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 1 << lgAuxArrInts, false);
|
|
299
274
|
}
|
|
300
275
|
|
|
301
276
|
}
|
|
@@ -28,22 +28,21 @@
|
|
|
28
28
|
|
|
29
29
|
namespace datasketches {
|
|
30
30
|
|
|
31
|
-
template<typename A
|
|
31
|
+
template<typename A>
|
|
32
32
|
class AuxHashMap final {
|
|
33
33
|
public:
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK);
|
|
34
|
+
AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
|
|
35
|
+
static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
|
|
37
36
|
static AuxHashMap* newAuxHashMap(const AuxHashMap<A>& that);
|
|
38
37
|
|
|
39
38
|
static AuxHashMap* deserialize(const void* bytes, size_t len,
|
|
40
39
|
int lgConfigK,
|
|
41
40
|
int auxCount, int lgAuxArrInts,
|
|
42
|
-
bool srcCompact);
|
|
41
|
+
bool srcCompact, const A& allocator);
|
|
43
42
|
static AuxHashMap* deserialize(std::istream& is, int lgConfigK,
|
|
44
43
|
int auxCount, int lgAuxArrInts,
|
|
45
|
-
bool srcCompact);
|
|
46
|
-
virtual ~AuxHashMap();
|
|
44
|
+
bool srcCompact, const A& allocator);
|
|
45
|
+
virtual ~AuxHashMap() = default;
|
|
47
46
|
static std::function<void(AuxHashMap<A>*)> make_deleter();
|
|
48
47
|
|
|
49
48
|
AuxHashMap* copy() const;
|
|
@@ -64,6 +63,8 @@ class AuxHashMap final {
|
|
|
64
63
|
private:
|
|
65
64
|
typedef typename std::allocator_traits<A>::template rebind_alloc<AuxHashMap<A>> ahmAlloc;
|
|
66
65
|
|
|
66
|
+
using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
|
|
67
|
+
|
|
67
68
|
// static so it can be used when resizing
|
|
68
69
|
static int find(const int* auxArr, int lgAuxArrInts, int lgConfigK, int slotNo);
|
|
69
70
|
|
|
@@ -73,7 +74,7 @@ class AuxHashMap final {
|
|
|
73
74
|
const int lgConfigK;
|
|
74
75
|
int lgAuxArrInts;
|
|
75
76
|
int auxCount;
|
|
76
|
-
|
|
77
|
+
vector_int entries;
|
|
77
78
|
};
|
|
78
79
|
|
|
79
80
|
}
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
|
|
25
25
|
namespace datasketches {
|
|
26
26
|
|
|
27
|
-
template<typename A = std::allocator<
|
|
27
|
+
template<typename A = std::allocator<uint8_t>>
|
|
28
28
|
class CompositeInterpolationXTable {
|
|
29
29
|
public:
|
|
30
30
|
static int get_y_stride(int logK);
|
|
@@ -37,4 +37,4 @@ class CompositeInterpolationXTable {
|
|
|
37
37
|
|
|
38
38
|
#include "CompositeInterpolationXTable-internal.hpp"
|
|
39
39
|
|
|
40
|
-
#endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
|
|
40
|
+
#endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
|
|
@@ -31,8 +31,8 @@ template<typename A>
|
|
|
31
31
|
static int find(const int* array, const int lgArrInts, const int coupon);
|
|
32
32
|
|
|
33
33
|
template<typename A>
|
|
34
|
-
CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHllType)
|
|
35
|
-
: CouponList<A>(lgConfigK, tgtHllType, hll_mode::SET)
|
|
34
|
+
CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHllType, const A& allocator)
|
|
35
|
+
: CouponList<A>(lgConfigK, tgtHllType, hll_mode::SET, allocator)
|
|
36
36
|
{
|
|
37
37
|
if (lgConfigK <= 7) {
|
|
38
38
|
throw std::invalid_argument("CouponHashSet must be initialized with lgConfigK > 7. Found: "
|
|
@@ -40,28 +40,22 @@ CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHl
|
|
|
40
40
|
}
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
-
template<typename A>
|
|
44
|
-
CouponHashSet<A>::CouponHashSet(const CouponHashSet<A>& that)
|
|
45
|
-
: CouponList<A>(that) {}
|
|
46
|
-
|
|
47
43
|
template<typename A>
|
|
48
44
|
CouponHashSet<A>::CouponHashSet(const CouponHashSet<A>& that, const target_hll_type tgtHllType)
|
|
49
45
|
: CouponList<A>(that, tgtHllType) {}
|
|
50
46
|
|
|
51
|
-
template<typename A>
|
|
52
|
-
CouponHashSet<A>::~CouponHashSet() {}
|
|
53
|
-
|
|
54
47
|
template<typename A>
|
|
55
48
|
std::function<void(HllSketchImpl<A>*)> CouponHashSet<A>::get_deleter() const {
|
|
56
49
|
return [](HllSketchImpl<A>* ptr) {
|
|
57
50
|
CouponHashSet<A>* chs = static_cast<CouponHashSet<A>*>(ptr);
|
|
51
|
+
ChsAlloc chsa(chs->getAllocator());
|
|
58
52
|
chs->~CouponHashSet();
|
|
59
|
-
|
|
53
|
+
chsa.deallocate(chs, 1);
|
|
60
54
|
};
|
|
61
55
|
}
|
|
62
56
|
|
|
63
57
|
template<typename A>
|
|
64
|
-
CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
|
|
58
|
+
CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len, const A& allocator) {
|
|
65
59
|
if (len < HllUtil<A>::HASH_SET_INT_ARR_START) { // hard-coded
|
|
66
60
|
throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
|
|
67
61
|
}
|
|
@@ -79,7 +73,7 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
|
|
|
79
73
|
|
|
80
74
|
const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
|
|
81
75
|
if (mode != SET) {
|
|
82
|
-
throw std::invalid_argument("Calling set
|
|
76
|
+
throw std::invalid_argument("Calling set constructor with non-set mode data");
|
|
83
77
|
}
|
|
84
78
|
|
|
85
79
|
const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
|
|
@@ -106,7 +100,8 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
|
|
|
106
100
|
+ ", found: " + std::to_string(len));
|
|
107
101
|
}
|
|
108
102
|
|
|
109
|
-
|
|
103
|
+
ChsAlloc chsa(allocator);
|
|
104
|
+
CouponHashSet<A>* sketch = new (chsa.allocate(1)) CouponHashSet<A>(lgK, tgtHllType, allocator);
|
|
110
105
|
|
|
111
106
|
if (compactFlag) {
|
|
112
107
|
const uint8_t* curPos = data + HllUtil<A>::HASH_SET_INT_ARR_START;
|
|
@@ -116,24 +111,19 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
|
|
|
116
111
|
sketch->couponUpdate(coupon);
|
|
117
112
|
}
|
|
118
113
|
} else {
|
|
119
|
-
|
|
120
|
-
const size_t oldArrLen = 1 << sketch->lgCouponArrInts;
|
|
121
|
-
sketch->lgCouponArrInts = lgArrInts;
|
|
122
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
|
123
|
-
sketch->couponIntArr = intAlloc().allocate(1 << lgArrInts);
|
|
114
|
+
sketch->coupons.resize(1 << lgArrInts);
|
|
124
115
|
sketch->couponCount = couponCount;
|
|
125
116
|
// only need to read valid coupons, unlike in stream case
|
|
126
|
-
std::memcpy(sketch->
|
|
117
|
+
std::memcpy(sketch->coupons.data(),
|
|
127
118
|
data + HllUtil<A>::HASH_SET_INT_ARR_START,
|
|
128
119
|
couponCount * sizeof(int));
|
|
129
|
-
intAlloc().deallocate(oldArr, oldArrLen);
|
|
130
120
|
}
|
|
131
121
|
|
|
132
122
|
return sketch;
|
|
133
123
|
}
|
|
134
124
|
|
|
135
125
|
template<typename A>
|
|
136
|
-
CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
|
|
126
|
+
CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is, const A& allocator) {
|
|
137
127
|
uint8_t listHeader[8];
|
|
138
128
|
is.read((char*)listHeader, 8 * sizeof(uint8_t));
|
|
139
129
|
|
|
@@ -149,7 +139,7 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
|
|
|
149
139
|
|
|
150
140
|
hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
|
|
151
141
|
if (mode != SET) {
|
|
152
|
-
throw std::invalid_argument("Calling set
|
|
142
|
+
throw std::invalid_argument("Calling set constructor with non-set mode data");
|
|
153
143
|
}
|
|
154
144
|
|
|
155
145
|
target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
|
|
@@ -168,7 +158,8 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
|
|
|
168
158
|
lgArrInts = HllUtil<A>::computeLgArrInts(SET, couponCount, lgK);
|
|
169
159
|
}
|
|
170
160
|
|
|
171
|
-
|
|
161
|
+
ChsAlloc chsa(allocator);
|
|
162
|
+
CouponHashSet<A>* sketch = new (chsa.allocate(1)) CouponHashSet<A>(lgK, tgtHllType, allocator);
|
|
172
163
|
typedef std::unique_ptr<CouponHashSet<A>, std::function<void(HllSketchImpl<A>*)>> coupon_hash_set_ptr;
|
|
173
164
|
coupon_hash_set_ptr ptr(sketch, sketch->get_deleter());
|
|
174
165
|
|
|
@@ -181,13 +172,10 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
|
|
|
181
172
|
sketch->couponUpdate(coupon);
|
|
182
173
|
}
|
|
183
174
|
} else {
|
|
184
|
-
|
|
185
|
-
intAlloc().deallocate(sketch->couponIntArr, 1 << sketch->lgCouponArrInts);
|
|
186
|
-
sketch->lgCouponArrInts = lgArrInts;
|
|
187
|
-
sketch->couponIntArr = intAlloc().allocate(1 << lgArrInts);
|
|
175
|
+
sketch->coupons.resize(1 << lgArrInts);
|
|
188
176
|
sketch->couponCount = couponCount;
|
|
189
177
|
// for stream processing, read entire list so read pointer ends up set correctly
|
|
190
|
-
is.read((char*)sketch->
|
|
178
|
+
is.read((char*)sketch->coupons.data(), sketch->coupons.size() * sizeof(int));
|
|
191
179
|
}
|
|
192
180
|
|
|
193
181
|
if (!is.good())
|
|
@@ -198,21 +186,24 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
|
|
|
198
186
|
|
|
199
187
|
template<typename A>
|
|
200
188
|
CouponHashSet<A>* CouponHashSet<A>::copy() const {
|
|
201
|
-
|
|
189
|
+
ChsAlloc chsa(this->coupons.get_allocator());
|
|
190
|
+
return new (chsa.allocate(1)) CouponHashSet<A>(*this);
|
|
202
191
|
}
|
|
203
192
|
|
|
204
193
|
template<typename A>
|
|
205
194
|
CouponHashSet<A>* CouponHashSet<A>::copyAs(const target_hll_type tgtHllType) const {
|
|
206
|
-
|
|
195
|
+
ChsAlloc chsa(this->coupons.get_allocator());
|
|
196
|
+
return new (chsa.allocate(1)) CouponHashSet<A>(*this, tgtHllType);
|
|
207
197
|
}
|
|
208
198
|
|
|
209
199
|
template<typename A>
|
|
210
200
|
HllSketchImpl<A>* CouponHashSet<A>::couponUpdate(int coupon) {
|
|
211
|
-
const
|
|
201
|
+
const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(this->coupons.size());
|
|
202
|
+
const int index = find<A>(this->coupons.data(), lgCouponArrInts, coupon);
|
|
212
203
|
if (index >= 0) {
|
|
213
204
|
return this; // found duplicate, ignore
|
|
214
205
|
}
|
|
215
|
-
this->
|
|
206
|
+
this->coupons[~index] = coupon; // found empty
|
|
216
207
|
++this->couponCount;
|
|
217
208
|
if (checkGrowOrPromote()) {
|
|
218
209
|
return this->promoteHeapListOrSetToHll(*this);
|
|
@@ -232,39 +223,34 @@ int CouponHashSet<A>::getPreInts() const {
|
|
|
232
223
|
|
|
233
224
|
template<typename A>
|
|
234
225
|
bool CouponHashSet<A>::checkGrowOrPromote() {
|
|
235
|
-
if ((HllUtil<A>::RESIZE_DENOM * this->couponCount) > (HllUtil<A>::RESIZE_NUMER *
|
|
236
|
-
|
|
226
|
+
if (static_cast<size_t>(HllUtil<A>::RESIZE_DENOM * this->couponCount) > (HllUtil<A>::RESIZE_NUMER * this->coupons.size())) {
|
|
227
|
+
const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(this->coupons.size());
|
|
228
|
+
if (lgCouponArrInts == (this->lgConfigK - 3)) { // at max size
|
|
237
229
|
return true; // promote to HLL
|
|
238
230
|
}
|
|
239
|
-
|
|
240
|
-
growHashSet(this->lgCouponArrInts, tgtLgCoupArrSize);
|
|
231
|
+
growHashSet(lgCouponArrInts + 1);
|
|
241
232
|
}
|
|
242
233
|
return false;
|
|
243
234
|
}
|
|
244
235
|
|
|
245
236
|
template<typename A>
|
|
246
|
-
void CouponHashSet<A>::growHashSet(
|
|
237
|
+
void CouponHashSet<A>::growHashSet(int tgtLgCoupArrSize) {
|
|
247
238
|
const int tgtLen = 1 << tgtLgCoupArrSize;
|
|
248
|
-
|
|
249
|
-
int* tgtCouponIntArr = intAlloc().allocate(tgtLen);
|
|
250
|
-
std::fill(tgtCouponIntArr, tgtCouponIntArr + tgtLen, 0);
|
|
239
|
+
vector_int coupons_new(tgtLen, 0, this->coupons.get_allocator());
|
|
251
240
|
|
|
252
|
-
const int srcLen =
|
|
241
|
+
const int srcLen = this->coupons.size();
|
|
253
242
|
for (int i = 0; i < srcLen; ++i) { // scan existing array for non-zero values
|
|
254
|
-
const int fetched = this->
|
|
243
|
+
const int fetched = this->coupons[i];
|
|
255
244
|
if (fetched != HllUtil<A>::EMPTY) {
|
|
256
|
-
const int idx = find<A>(
|
|
245
|
+
const int idx = find<A>(coupons_new.data(), tgtLgCoupArrSize, fetched); // search TGT array
|
|
257
246
|
if (idx < 0) { // found EMPTY
|
|
258
|
-
|
|
247
|
+
coupons_new[~idx] = fetched; // insert
|
|
259
248
|
continue;
|
|
260
249
|
}
|
|
261
250
|
throw std::runtime_error("Error: Found duplicate coupon");
|
|
262
251
|
}
|
|
263
252
|
}
|
|
264
|
-
|
|
265
|
-
intAlloc().deallocate(this->couponIntArr, 1 << this->lgCouponArrInts);
|
|
266
|
-
this->couponIntArr = tgtCouponIntArr;
|
|
267
|
-
this->lgCouponArrInts = tgtLgCoupArrSize;
|
|
253
|
+
this->coupons = std::move(coupons_new);
|
|
268
254
|
}
|
|
269
255
|
|
|
270
256
|
template<typename A>
|