datasketches 0.1.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +10 -0
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +18 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +13 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +20 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +116 -105
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +22 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +140 -101
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +20 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -16
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +21 -21
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +102 -105
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +141 -125
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +5 -5
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +81 -109
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +25 -24
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +89 -105
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +130 -165
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +21 -22
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +88 -83
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +34 -45
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +7 -8
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +41 -52
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +7 -8
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +220 -251
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +42 -42
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +36 -38
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +47 -44
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +62 -87
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +121 -128
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +25 -53
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +8 -8
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +36 -36
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +28 -28
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +37 -37
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +57 -61
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +40 -25
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +50 -6
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +164 -136
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +178 -88
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +12 -6
- data/vendor/datasketches-cpp/python/README.md +52 -49
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -6
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +4 -2
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +38 -28
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -2
- data/vendor/datasketches-cpp/python/tests/kll_test.py +5 -5
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +18 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +488 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +19 -13
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +130 -127
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +41 -49
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -44
- data/vendor/datasketches-cpp/setup.py +11 -6
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +3 -2
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +11 -4
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +26 -28
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +24 -36
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +163 -256
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +250 -651
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +10 -21
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +44 -30
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +60 -5
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +74 -235
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +57 -70
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +18 -21
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +13 -16
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +7 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +3 -3
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +13 -16
- metadata +51 -36
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -30,13 +30,14 @@
|
|
30
30
|
namespace datasketches {
|
31
31
|
|
32
32
|
template<typename T, typename C, typename S, typename A>
|
33
|
-
kll_sketch<T, C, S, A>::kll_sketch(uint16_t k):
|
33
|
+
kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, const A& allocator):
|
34
|
+
allocator_(allocator),
|
34
35
|
k_(k),
|
35
36
|
m_(DEFAULT_M),
|
36
37
|
min_k_(k),
|
37
38
|
n_(0),
|
38
39
|
num_levels_(1),
|
39
|
-
levels_(2),
|
40
|
+
levels_(2, 0, allocator),
|
40
41
|
items_(nullptr),
|
41
42
|
items_size_(k_),
|
42
43
|
min_value_(nullptr),
|
@@ -47,11 +48,12 @@ is_level_zero_sorted_(false)
|
|
47
48
|
throw std::invalid_argument("K must be >= " + std::to_string(MIN_K) + " and <= " + std::to_string(MAX_K) + ": " + std::to_string(k));
|
48
49
|
}
|
49
50
|
levels_[0] = levels_[1] = k;
|
50
|
-
items_ =
|
51
|
+
items_ = allocator_.allocate(items_size_);
|
51
52
|
}
|
52
53
|
|
53
54
|
template<typename T, typename C, typename S, typename A>
|
54
55
|
kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch& other):
|
56
|
+
allocator_(other.allocator_),
|
55
57
|
k_(other.k_),
|
56
58
|
m_(other.m_),
|
57
59
|
min_k_(other.min_k_),
|
@@ -64,14 +66,15 @@ min_value_(nullptr),
|
|
64
66
|
max_value_(nullptr),
|
65
67
|
is_level_zero_sorted_(other.is_level_zero_sorted_)
|
66
68
|
{
|
67
|
-
items_ =
|
69
|
+
items_ = allocator_.allocate(items_size_);
|
68
70
|
std::copy(&other.items_[levels_[0]], &other.items_[levels_[num_levels_]], &items_[levels_[0]]);
|
69
|
-
if (other.min_value_ != nullptr) min_value_ = new (
|
70
|
-
if (other.max_value_ != nullptr) max_value_ = new (
|
71
|
+
if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
|
72
|
+
if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
|
71
73
|
}
|
72
74
|
|
73
75
|
template<typename T, typename C, typename S, typename A>
|
74
76
|
kll_sketch<T, C, S, A>::kll_sketch(kll_sketch&& other) noexcept:
|
77
|
+
allocator_(std::move(other.allocator_)),
|
75
78
|
k_(other.k_),
|
76
79
|
m_(other.m_),
|
77
80
|
min_k_(other.min_k_),
|
@@ -91,7 +94,8 @@ is_level_zero_sorted_(other.is_level_zero_sorted_)
|
|
91
94
|
|
92
95
|
template<typename T, typename C, typename S, typename A>
|
93
96
|
kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(const kll_sketch& other) {
|
94
|
-
kll_sketch copy(other);
|
97
|
+
kll_sketch<T, C, S, A> copy(other);
|
98
|
+
std::swap(allocator_, copy.allocator_);
|
95
99
|
std::swap(k_, copy.k_);
|
96
100
|
std::swap(m_, copy.m_);
|
97
101
|
std::swap(min_k_, copy.min_k_);
|
@@ -108,6 +112,7 @@ kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(const kll_sketch& othe
|
|
108
112
|
|
109
113
|
template<typename T, typename C, typename S, typename A>
|
110
114
|
kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(kll_sketch&& other) {
|
115
|
+
std::swap(allocator_, other.allocator_);
|
111
116
|
std::swap(k_, other.k_);
|
112
117
|
std::swap(m_, other.m_);
|
113
118
|
std::swap(min_k_, other.min_k_);
|
@@ -128,15 +133,15 @@ kll_sketch<T, C, S, A>::~kll_sketch() {
|
|
128
133
|
const uint32_t begin = levels_[0];
|
129
134
|
const uint32_t end = levels_[num_levels_];
|
130
135
|
for (uint32_t i = begin; i < end; i++) items_[i].~T();
|
131
|
-
|
136
|
+
allocator_.deallocate(items_, items_size_);
|
132
137
|
}
|
133
138
|
if (min_value_ != nullptr) {
|
134
139
|
min_value_->~T();
|
135
|
-
|
140
|
+
allocator_.deallocate(min_value_, 1);
|
136
141
|
}
|
137
142
|
if (max_value_ != nullptr) {
|
138
143
|
max_value_->~T();
|
139
|
-
|
144
|
+
allocator_.deallocate(max_value_, 1);
|
140
145
|
}
|
141
146
|
}
|
142
147
|
|
@@ -159,8 +164,8 @@ void kll_sketch<T, C, S, A>::update(T&& value) {
|
|
159
164
|
template<typename T, typename C, typename S, typename A>
|
160
165
|
void kll_sketch<T, C, S, A>::update_min_max(const T& value) {
|
161
166
|
if (is_empty()) {
|
162
|
-
min_value_ = new (
|
163
|
-
max_value_ = new (
|
167
|
+
min_value_ = new (allocator_.allocate(1)) T(value);
|
168
|
+
max_value_ = new (allocator_.allocate(1)) T(value);
|
164
169
|
} else {
|
165
170
|
if (C()(value, *min_value_)) *min_value_ = value;
|
166
171
|
if (C()(*max_value_, value)) *max_value_ = value;
|
@@ -182,8 +187,8 @@ void kll_sketch<T, C, S, A>::merge(const kll_sketch& other) {
|
|
182
187
|
throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
|
183
188
|
}
|
184
189
|
if (is_empty()) {
|
185
|
-
min_value_ = new (
|
186
|
-
max_value_ = new (
|
190
|
+
min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
|
191
|
+
max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
|
187
192
|
} else {
|
188
193
|
if (C()(*other.min_value_, *min_value_)) *min_value_ = *other.min_value_;
|
189
194
|
if (C()(*max_value_, *other.max_value_)) *max_value_ = *other.max_value_;
|
@@ -206,8 +211,8 @@ void kll_sketch<T, C, S, A>::merge(kll_sketch&& other) {
|
|
206
211
|
throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
|
207
212
|
}
|
208
213
|
if (is_empty()) {
|
209
|
-
min_value_ = new (
|
210
|
-
max_value_ = new (
|
214
|
+
min_value_ = new (allocator_.allocate(1)) T(std::move(*other.min_value_));
|
215
|
+
max_value_ = new (allocator_.allocate(1)) T(std::move(*other.max_value_));
|
211
216
|
} else {
|
212
217
|
if (C()(*other.min_value_, *min_value_)) *min_value_ = std::move(*other.min_value_);
|
213
218
|
if (C()(*max_value_, *other.max_value_)) *max_value_ = std::move(*other.max_value_);
|
@@ -228,6 +233,11 @@ bool kll_sketch<T, C, S, A>::is_empty() const {
|
|
228
233
|
return n_ == 0;
|
229
234
|
}
|
230
235
|
|
236
|
+
template<typename T, typename C, typename S, typename A>
|
237
|
+
uint16_t kll_sketch<T, C, S, A>::get_k() const {
|
238
|
+
return k_;
|
239
|
+
}
|
240
|
+
|
231
241
|
template<typename T, typename C, typename S, typename A>
|
232
242
|
uint64_t kll_sketch<T, C, S, A>::get_n() const {
|
233
243
|
return n_;
|
@@ -270,8 +280,7 @@ T kll_sketch<T, C, S, A>::get_quantile(double fraction) const {
|
|
270
280
|
|
271
281
|
template<typename T, typename C, typename S, typename A>
|
272
282
|
std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* fractions, uint32_t size) const {
|
273
|
-
std::vector<T, A> quantiles;
|
274
|
-
quantiles.reserve(size);
|
283
|
+
std::vector<T, A> quantiles(allocator_);
|
275
284
|
if (is_empty()) return quantiles;
|
276
285
|
std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> quantile_calculator;
|
277
286
|
quantiles.reserve(size);
|
@@ -294,12 +303,12 @@ std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* fractions,
|
|
294
303
|
}
|
295
304
|
|
296
305
|
template<typename T, typename C, typename S, typename A>
|
297
|
-
std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(
|
298
|
-
if (is_empty()) return std::vector<T, A>();
|
306
|
+
std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(uint32_t num) const {
|
307
|
+
if (is_empty()) return std::vector<T, A>(allocator_);
|
299
308
|
if (num == 0) {
|
300
309
|
throw std::invalid_argument("num must be > 0");
|
301
310
|
}
|
302
|
-
|
311
|
+
vector_d<A> fractions(num, 0, allocator_);
|
303
312
|
fractions[0] = 0.0;
|
304
313
|
for (size_t i = 1; i < num; i++) {
|
305
314
|
fractions[i] = static_cast<double>(i) / (num - 1);
|
@@ -371,36 +380,56 @@ size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes() const {
|
|
371
380
|
size_t size = DATA_START + num_levels_ * sizeof(uint32_t);
|
372
381
|
size += S().size_of_item(*min_value_);
|
373
382
|
size += S().size_of_item(*max_value_);
|
374
|
-
for (auto
|
383
|
+
for (auto it: *this) size += S().size_of_item(it.first);
|
375
384
|
return size;
|
376
385
|
}
|
377
386
|
|
387
|
+
// implementation for fixed-size arithmetic types (integral and floating point)
|
388
|
+
template<typename T, typename C, typename S, typename A>
|
389
|
+
template<typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
390
|
+
size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n) {
|
391
|
+
const uint8_t num_levels = kll_helper::ub_on_num_levels(n);
|
392
|
+
const uint32_t max_num_retained = kll_helper::compute_total_capacity(k, DEFAULT_M, num_levels);
|
393
|
+
// the last integer in the levels_ array is not serialized because it can be derived
|
394
|
+
return DATA_START + num_levels * sizeof(uint32_t) + (max_num_retained + 2) * sizeof(TT);
|
395
|
+
}
|
396
|
+
|
397
|
+
// implementation for all other types
|
398
|
+
template<typename T, typename C, typename S, typename A>
|
399
|
+
template<typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
|
400
|
+
size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n, size_t max_item_size_bytes) {
|
401
|
+
const uint8_t num_levels = kll_helper::ub_on_num_levels(n);
|
402
|
+
const uint32_t max_num_retained = kll_helper::compute_total_capacity(k, DEFAULT_M, num_levels);
|
403
|
+
// the last integer in the levels_ array is not serialized because it can be derived
|
404
|
+
return DATA_START + num_levels * sizeof(uint32_t) + (max_num_retained + 2) * max_item_size_bytes;
|
405
|
+
}
|
406
|
+
|
378
407
|
template<typename T, typename C, typename S, typename A>
|
379
408
|
void kll_sketch<T, C, S, A>::serialize(std::ostream& os) const {
|
380
409
|
const bool is_single_item = n_ == 1;
|
381
410
|
const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
|
382
|
-
|
411
|
+
write(os, preamble_ints);
|
383
412
|
const uint8_t serial_version(is_single_item ? SERIAL_VERSION_2 : SERIAL_VERSION_1);
|
384
|
-
|
413
|
+
write(os, serial_version);
|
385
414
|
const uint8_t family(FAMILY);
|
386
|
-
|
415
|
+
write(os, family);
|
387
416
|
const uint8_t flags_byte(
|
388
417
|
(is_empty() ? 1 << flags::IS_EMPTY : 0)
|
389
418
|
| (is_level_zero_sorted_ ? 1 << flags::IS_LEVEL_ZERO_SORTED : 0)
|
390
419
|
| (is_single_item ? 1 << flags::IS_SINGLE_ITEM : 0)
|
391
420
|
);
|
392
|
-
|
393
|
-
|
394
|
-
|
421
|
+
write(os, flags_byte);
|
422
|
+
write(os, k_);
|
423
|
+
write(os, m_);
|
395
424
|
const uint8_t unused = 0;
|
396
|
-
|
425
|
+
write(os, unused);
|
397
426
|
if (is_empty()) return;
|
398
427
|
if (!is_single_item) {
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
428
|
+
write(os, n_);
|
429
|
+
write(os, min_k_);
|
430
|
+
write(os, num_levels_);
|
431
|
+
write(os, unused);
|
432
|
+
write(os, levels_.data(), sizeof(levels_[0]) * num_levels_);
|
404
433
|
S().serialize(os, min_value_, 1);
|
405
434
|
S().serialize(os, max_value_, 1);
|
406
435
|
}
|
@@ -411,31 +440,30 @@ template<typename T, typename C, typename S, typename A>
|
|
411
440
|
vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const {
|
412
441
|
const bool is_single_item = n_ == 1;
|
413
442
|
const size_t size = header_size_bytes + get_serialized_size_bytes();
|
414
|
-
vector_u8<A> bytes(size);
|
443
|
+
vector_u8<A> bytes(size, 0, allocator_);
|
415
444
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
416
445
|
const uint8_t* end_ptr = ptr + size;
|
417
446
|
const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
|
418
|
-
ptr += copy_to_mem(
|
447
|
+
ptr += copy_to_mem(preamble_ints, ptr);
|
419
448
|
const uint8_t serial_version(is_single_item ? SERIAL_VERSION_2 : SERIAL_VERSION_1);
|
420
|
-
ptr += copy_to_mem(
|
449
|
+
ptr += copy_to_mem(serial_version, ptr);
|
421
450
|
const uint8_t family(FAMILY);
|
422
|
-
ptr += copy_to_mem(
|
451
|
+
ptr += copy_to_mem(family, ptr);
|
423
452
|
const uint8_t flags_byte(
|
424
453
|
(is_empty() ? 1 << flags::IS_EMPTY : 0)
|
425
454
|
| (is_level_zero_sorted_ ? 1 << flags::IS_LEVEL_ZERO_SORTED : 0)
|
426
455
|
| (is_single_item ? 1 << flags::IS_SINGLE_ITEM : 0)
|
427
456
|
);
|
428
|
-
ptr += copy_to_mem(
|
429
|
-
ptr += copy_to_mem(
|
430
|
-
ptr += copy_to_mem(
|
431
|
-
|
432
|
-
ptr += copy_to_mem(&unused, ptr, sizeof(unused));
|
457
|
+
ptr += copy_to_mem(flags_byte, ptr);
|
458
|
+
ptr += copy_to_mem(k_, ptr);
|
459
|
+
ptr += copy_to_mem(m_, ptr);
|
460
|
+
ptr += sizeof(uint8_t); // unused
|
433
461
|
if (!is_empty()) {
|
434
462
|
if (!is_single_item) {
|
435
|
-
ptr += copy_to_mem(
|
436
|
-
ptr += copy_to_mem(
|
437
|
-
ptr += copy_to_mem(
|
438
|
-
ptr +=
|
463
|
+
ptr += copy_to_mem(n_, ptr);
|
464
|
+
ptr += copy_to_mem(min_k_, ptr);
|
465
|
+
ptr += copy_to_mem(num_levels_, ptr);
|
466
|
+
ptr += sizeof(uint8_t); // unused
|
439
467
|
ptr += copy_to_mem(levels_.data(), ptr, sizeof(levels_[0]) * num_levels_);
|
440
468
|
ptr += S().serialize(ptr, end_ptr - ptr, min_value_, 1);
|
441
469
|
ptr += S().serialize(ptr, end_ptr - ptr, max_value_, 1);
|
@@ -449,21 +477,14 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const
|
|
449
477
|
}
|
450
478
|
|
451
479
|
template<typename T, typename C, typename S, typename A>
|
452
|
-
kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is) {
|
453
|
-
|
454
|
-
|
455
|
-
uint8_t
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
uint8_t
|
460
|
-
is.read((char*)&flags_byte, sizeof(flags_byte));
|
461
|
-
uint16_t k;
|
462
|
-
is.read((char*)&k, sizeof(k));
|
463
|
-
uint8_t m;
|
464
|
-
is.read((char*)&m, sizeof(m));
|
465
|
-
uint8_t unused;
|
466
|
-
is.read((char*)&unused, sizeof(unused));
|
480
|
+
kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
|
481
|
+
const auto preamble_ints = read<uint8_t>(is);
|
482
|
+
const auto serial_version = read<uint8_t>(is);
|
483
|
+
const auto family_id = read<uint8_t>(is);
|
484
|
+
const auto flags_byte = read<uint8_t>(is);
|
485
|
+
const auto k = read<uint16_t>(is);
|
486
|
+
const auto m = read<uint8_t>(is);
|
487
|
+
read<uint8_t>(is); // skip unused byte
|
467
488
|
|
468
489
|
check_m(m);
|
469
490
|
check_preamble_ints(preamble_ints, flags_byte);
|
@@ -472,7 +493,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is) {
|
|
472
493
|
|
473
494
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
474
495
|
const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
|
475
|
-
if (is_empty) return kll_sketch(k);
|
496
|
+
if (is_empty) return kll_sketch(k, allocator);
|
476
497
|
|
477
498
|
uint64_t n;
|
478
499
|
uint16_t min_k;
|
@@ -483,79 +504,81 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is) {
|
|
483
504
|
min_k = k;
|
484
505
|
num_levels = 1;
|
485
506
|
} else {
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
507
|
+
n = read<uint64_t>(is);
|
508
|
+
min_k = read<uint16_t>(is);
|
509
|
+
num_levels = read<uint8_t>(is);
|
510
|
+
read<uint8_t>(is); // skip unused byte
|
490
511
|
}
|
491
|
-
vector_u32<A> levels(num_levels + 1);
|
512
|
+
vector_u32<A> levels(num_levels + 1, 0, allocator);
|
492
513
|
const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
|
493
514
|
if (is_single_item) {
|
494
515
|
levels[0] = capacity - 1;
|
495
516
|
} else {
|
496
517
|
// the last integer in levels_ is not serialized because it can be derived
|
497
|
-
|
518
|
+
read(is, levels.data(), sizeof(levels[0]) * num_levels);
|
498
519
|
}
|
499
520
|
levels[num_levels] = capacity;
|
500
|
-
|
501
|
-
|
502
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
503
|
-
std::unique_ptr<T,
|
504
|
-
std::unique_ptr<T, item_deleter>
|
521
|
+
A alloc(allocator);
|
522
|
+
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
|
523
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
|
524
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
|
525
|
+
std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
|
526
|
+
std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
|
505
527
|
if (!is_single_item) {
|
506
528
|
S().deserialize(is, min_value_buffer.get(), 1);
|
507
529
|
// serde call did not throw, repackage with destrtuctor
|
508
|
-
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
|
530
|
+
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
|
509
531
|
S().deserialize(is, max_value_buffer.get(), 1);
|
510
532
|
// serde call did not throw, repackage with destrtuctor
|
511
|
-
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
|
533
|
+
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
|
512
534
|
}
|
513
|
-
auto items_buffer_deleter = [capacity](T* ptr) {
|
514
|
-
std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(
|
535
|
+
auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
|
536
|
+
std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
|
515
537
|
const auto num_items = levels[num_levels] - levels[0];
|
516
538
|
S().deserialize(is, &items_buffer.get()[levels[0]], num_items);
|
517
539
|
// serde call did not throw, repackage with destrtuctors
|
518
|
-
std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity));
|
540
|
+
std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
|
519
541
|
const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
|
520
542
|
if (is_single_item) {
|
521
543
|
new (min_value_buffer.get()) T(items.get()[levels[0]]);
|
522
544
|
// copy did not throw, repackage with destrtuctor
|
523
|
-
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
|
545
|
+
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
|
524
546
|
new (max_value_buffer.get()) T(items.get()[levels[0]]);
|
525
547
|
// copy did not throw, repackage with destrtuctor
|
526
|
-
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
|
548
|
+
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
|
527
549
|
}
|
528
|
-
if (!is.good())
|
550
|
+
if (!is.good())
|
551
|
+
throw std::runtime_error("error reading from std::istream");
|
529
552
|
return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
|
530
553
|
std::move(min_value), std::move(max_value), is_level_zero_sorted);
|
531
554
|
}
|
532
555
|
|
533
556
|
template<typename T, typename C, typename S, typename A>
|
534
|
-
kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size) {
|
557
|
+
kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
|
535
558
|
ensure_minimum_memory(size, 8);
|
536
559
|
const char* ptr = static_cast<const char*>(bytes);
|
537
560
|
uint8_t preamble_ints;
|
538
|
-
ptr += copy_from_mem(ptr,
|
561
|
+
ptr += copy_from_mem(ptr, preamble_ints);
|
539
562
|
uint8_t serial_version;
|
540
|
-
ptr += copy_from_mem(ptr,
|
563
|
+
ptr += copy_from_mem(ptr, serial_version);
|
541
564
|
uint8_t family_id;
|
542
|
-
ptr += copy_from_mem(ptr,
|
565
|
+
ptr += copy_from_mem(ptr, family_id);
|
543
566
|
uint8_t flags_byte;
|
544
|
-
ptr += copy_from_mem(ptr,
|
567
|
+
ptr += copy_from_mem(ptr, flags_byte);
|
545
568
|
uint16_t k;
|
546
|
-
ptr += copy_from_mem(ptr,
|
569
|
+
ptr += copy_from_mem(ptr, k);
|
547
570
|
uint8_t m;
|
548
|
-
ptr += copy_from_mem(ptr,
|
549
|
-
ptr
|
571
|
+
ptr += copy_from_mem(ptr, m);
|
572
|
+
ptr += sizeof(uint8_t); // skip unused byte
|
550
573
|
|
551
574
|
check_m(m);
|
552
575
|
check_preamble_ints(preamble_ints, flags_byte);
|
553
576
|
check_serial_version(serial_version);
|
554
577
|
check_family_id(family_id);
|
555
|
-
ensure_minimum_memory(size,
|
578
|
+
ensure_minimum_memory(size, preamble_ints * sizeof(uint32_t));
|
556
579
|
|
557
580
|
const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
|
558
|
-
if (is_empty) return kll_sketch<T, C, S, A>(k);
|
581
|
+
if (is_empty) return kll_sketch<T, C, S, A>(k, allocator);
|
559
582
|
|
560
583
|
uint64_t n;
|
561
584
|
uint16_t min_k;
|
@@ -567,12 +590,12 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
|
|
567
590
|
min_k = k;
|
568
591
|
num_levels = 1;
|
569
592
|
} else {
|
570
|
-
ptr += copy_from_mem(ptr,
|
571
|
-
ptr += copy_from_mem(ptr,
|
572
|
-
ptr += copy_from_mem(ptr,
|
573
|
-
ptr
|
593
|
+
ptr += copy_from_mem(ptr, n);
|
594
|
+
ptr += copy_from_mem(ptr, min_k);
|
595
|
+
ptr += copy_from_mem(ptr, num_levels);
|
596
|
+
ptr += sizeof(uint8_t); // skip unused byte
|
574
597
|
}
|
575
|
-
vector_u32<A> levels(num_levels + 1);
|
598
|
+
vector_u32<A> levels(num_levels + 1, 0, allocator);
|
576
599
|
const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
|
577
600
|
if (is_single_item) {
|
578
601
|
levels[0] = capacity - 1;
|
@@ -581,35 +604,36 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
|
|
581
604
|
ptr += copy_from_mem(ptr, levels.data(), sizeof(levels[0]) * num_levels);
|
582
605
|
}
|
583
606
|
levels[num_levels] = capacity;
|
584
|
-
|
585
|
-
|
586
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
587
|
-
std::unique_ptr<T,
|
588
|
-
std::unique_ptr<T, item_deleter>
|
607
|
+
A alloc(allocator);
|
608
|
+
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
|
609
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
|
610
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
|
611
|
+
std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
|
612
|
+
std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
|
589
613
|
if (!is_single_item) {
|
590
614
|
ptr += S().deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
|
591
615
|
// serde call did not throw, repackage with destrtuctor
|
592
|
-
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
|
616
|
+
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
|
593
617
|
ptr += S().deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
|
594
618
|
// serde call did not throw, repackage with destrtuctor
|
595
|
-
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
|
619
|
+
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
|
596
620
|
}
|
597
|
-
auto items_buffer_deleter = [capacity](T* ptr) {
|
598
|
-
std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(
|
621
|
+
auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
|
622
|
+
std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
|
599
623
|
const auto num_items = levels[num_levels] - levels[0];
|
600
624
|
ptr += S().deserialize(ptr, end_ptr - ptr, &items_buffer.get()[levels[0]], num_items);
|
601
625
|
// serde call did not throw, repackage with destrtuctors
|
602
|
-
std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity));
|
626
|
+
std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
|
603
627
|
const size_t delta = ptr - static_cast<const char*>(bytes);
|
604
628
|
if (delta != size) throw std::logic_error("deserialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
|
605
629
|
const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
|
606
630
|
if (is_single_item) {
|
607
631
|
new (min_value_buffer.get()) T(items.get()[levels[0]]);
|
608
632
|
// copy did not throw, repackage with destrtuctor
|
609
|
-
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
|
633
|
+
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
|
610
634
|
new (max_value_buffer.get()) T(items.get()[levels[0]]);
|
611
635
|
// copy did not throw, repackage with destrtuctor
|
612
|
-
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
|
636
|
+
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
|
613
637
|
}
|
614
638
|
return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
|
615
639
|
std::move(min_value), std::move(max_value), is_level_zero_sorted);
|
@@ -634,6 +658,7 @@ template<typename T, typename C, typename S, typename A>
|
|
634
658
|
kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32<A>&& levels,
|
635
659
|
std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_value,
|
636
660
|
std::unique_ptr<T, item_deleter> max_value, bool is_level_zero_sorted):
|
661
|
+
allocator_(levels.get_allocator()),
|
637
662
|
k_(k),
|
638
663
|
m_(DEFAULT_M),
|
639
664
|
min_k_(min_k),
|
@@ -735,9 +760,9 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
|
|
735
760
|
const uint32_t new_total_cap = cur_total_cap + delta_cap;
|
736
761
|
|
737
762
|
// move (and shift) the current data into the new buffer
|
738
|
-
T* new_buf =
|
763
|
+
T* new_buf = allocator_.allocate(new_total_cap);
|
739
764
|
kll_helper::move_construct<T>(items_, 0, cur_total_cap, new_buf, delta_cap, true);
|
740
|
-
|
765
|
+
allocator_.deallocate(items_, items_size_);
|
741
766
|
items_ = new_buf;
|
742
767
|
items_size_ = new_total_cap;
|
743
768
|
|
@@ -763,19 +788,20 @@ void kll_sketch<T, C, S, A>::sort_level_zero() {
|
|
763
788
|
template<typename T, typename C, typename S, typename A>
|
764
789
|
std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> kll_sketch<T, C, S, A>::get_quantile_calculator() {
|
765
790
|
sort_level_zero();
|
766
|
-
|
791
|
+
using AllocCalc = typename std::allocator_traits<A>::template rebind_alloc<kll_quantile_calculator<T, C, A>>;
|
792
|
+
AllocCalc alloc(allocator_);
|
767
793
|
std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> quantile_calculator(
|
768
|
-
new (
|
769
|
-
[](kll_quantile_calculator<T, C, A>* ptr){ ptr->~kll_quantile_calculator<T, C, A>();
|
794
|
+
new (alloc.allocate(1)) kll_quantile_calculator<T, C, A>(*this),
|
795
|
+
[&alloc](kll_quantile_calculator<T, C, A>* ptr){ ptr->~kll_quantile_calculator<T, C, A>(); alloc.deallocate(ptr, 1); }
|
770
796
|
);
|
771
797
|
return quantile_calculator;
|
772
798
|
}
|
773
799
|
|
774
800
|
template<typename T, typename C, typename S, typename A>
|
775
801
|
vector_d<A> kll_sketch<T, C, S, A>::get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const {
|
776
|
-
if (is_empty()) return vector_d<A>();
|
802
|
+
if (is_empty()) return vector_d<A>(allocator_);
|
777
803
|
kll_helper::validate_values<T, C>(split_points, size);
|
778
|
-
vector_d<A> buckets(size + 1, 0);
|
804
|
+
vector_d<A> buckets(size + 1, 0, allocator_);
|
779
805
|
uint8_t level = 0;
|
780
806
|
uint64_t weight = 1;
|
781
807
|
while (level < num_levels_) {
|
@@ -845,12 +871,13 @@ template<typename T, typename C, typename S, typename A>
|
|
845
871
|
template<typename O>
|
846
872
|
void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
|
847
873
|
const uint32_t tmp_num_items = get_num_retained() + other.get_num_retained_above_level_zero();
|
848
|
-
|
849
|
-
|
874
|
+
A alloc(allocator_);
|
875
|
+
auto tmp_items_deleter = [tmp_num_items, &alloc](T* ptr) { alloc.deallocate(ptr, tmp_num_items); }; // no destructor needed
|
876
|
+
const std::unique_ptr<T, decltype(tmp_items_deleter)> workbuf(allocator_.allocate(tmp_num_items), tmp_items_deleter);
|
850
877
|
const uint8_t ub = kll_helper::ub_on_num_levels(final_n);
|
851
878
|
const size_t work_levels_size = ub + 2; // ub+1 does not work
|
852
|
-
vector_u32<A> worklevels(work_levels_size);
|
853
|
-
vector_u32<A> outlevels(work_levels_size);
|
879
|
+
vector_u32<A> worklevels(work_levels_size, 0, allocator_);
|
880
|
+
vector_u32<A> outlevels(work_levels_size, 0, allocator_);
|
854
881
|
|
855
882
|
const uint8_t provisional_num_levels = std::max(num_levels_, other.num_levels_);
|
856
883
|
|
@@ -864,9 +891,9 @@ void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
|
|
864
891
|
|
865
892
|
// now we need to transfer the results back into "this" sketch
|
866
893
|
if (result.final_capacity != items_size_) {
|
867
|
-
|
894
|
+
allocator_.deallocate(items_, items_size_);
|
868
895
|
items_size_ = result.final_capacity;
|
869
|
-
items_ =
|
896
|
+
items_ = allocator_.allocate(items_size_);
|
870
897
|
}
|
871
898
|
const uint32_t free_space_at_bottom = result.final_capacity - result.final_num_items;
|
872
899
|
kll_helper::move_construct<T>(workbuf.get(), outlevels[0], outlevels[0] + result.final_num_items, items_, free_space_at_bottom, true);
|
@@ -1052,14 +1079,14 @@ typename kll_sketch<T, C, S, A>::const_iterator kll_sketch<T, C, S, A>::begin()
|
|
1052
1079
|
|
1053
1080
|
template <typename T, typename C, typename S, typename A>
|
1054
1081
|
typename kll_sketch<T, C, S, A>::const_iterator kll_sketch<T, C, S, A>::end() const {
|
1055
|
-
return kll_sketch<T, C, S, A>::const_iterator(nullptr,
|
1082
|
+
return kll_sketch<T, C, S, A>::const_iterator(nullptr, levels_.data(), num_levels_);
|
1056
1083
|
}
|
1057
1084
|
|
1058
1085
|
// kll_sketch::const_iterator implementation
|
1059
1086
|
|
1060
1087
|
template<typename T, typename C, typename S, typename A>
|
1061
1088
|
kll_sketch<T, C, S, A>::const_iterator::const_iterator(const T* items, const uint32_t* levels, const uint8_t num_levels):
|
1062
|
-
items(items), levels(levels), num_levels(num_levels), index(
|
1089
|
+
items(items), levels(levels), num_levels(num_levels), index(items == nullptr ? levels[num_levels] : levels[0]), level(items == nullptr ? num_levels : 0), weight(1)
|
1063
1090
|
{}
|
1064
1091
|
|
1065
1092
|
template<typename T, typename C, typename S, typename A>
|
@@ -1083,8 +1110,6 @@ typename kll_sketch<T, C, S, A>::const_iterator& kll_sketch<T, C, S, A>::const_i
|
|
1083
1110
|
|
1084
1111
|
template<typename T, typename C, typename S, typename A>
|
1085
1112
|
bool kll_sketch<T, C, S, A>::const_iterator::operator==(const const_iterator& other) const {
|
1086
|
-
if (level != other.level) return false;
|
1087
|
-
if (level == num_levels) return true; // end
|
1088
1113
|
return index == other.index;
|
1089
1114
|
}
|
1090
1115
|
|
@@ -1101,29 +1126,32 @@ const std::pair<const T&, const uint64_t> kll_sketch<T, C, S, A>::const_iterator
|
|
1101
1126
|
template<typename T, typename C, typename S, typename A>
|
1102
1127
|
class kll_sketch<T, C, S, A>::item_deleter {
|
1103
1128
|
public:
|
1104
|
-
|
1129
|
+
item_deleter(const A& allocator): allocator_(allocator) {}
|
1130
|
+
void operator() (T* ptr) {
|
1105
1131
|
if (ptr != nullptr) {
|
1106
1132
|
ptr->~T();
|
1107
|
-
|
1133
|
+
allocator_.deallocate(ptr, 1);
|
1108
1134
|
}
|
1109
1135
|
}
|
1136
|
+
private:
|
1137
|
+
A allocator_;
|
1110
1138
|
};
|
1111
1139
|
|
1112
1140
|
template<typename T, typename C, typename S, typename A>
|
1113
1141
|
class kll_sketch<T, C, S, A>::items_deleter {
|
1114
1142
|
public:
|
1115
|
-
items_deleter(uint32_t start, uint32_t num
|
1116
|
-
|
1143
|
+
items_deleter(uint32_t start, uint32_t num, const A& allocator):
|
1144
|
+
allocator_(allocator), start_(start), num_(num) {}
|
1145
|
+
void operator() (T* ptr) {
|
1117
1146
|
if (ptr != nullptr) {
|
1118
|
-
for (uint32_t i =
|
1119
|
-
|
1120
|
-
}
|
1121
|
-
A().deallocate(ptr, num);
|
1147
|
+
for (uint32_t i = start_; i < num_; ++i) ptr[i].~T();
|
1148
|
+
allocator_.deallocate(ptr, num_);
|
1122
1149
|
}
|
1123
1150
|
}
|
1124
1151
|
private:
|
1125
|
-
|
1126
|
-
uint32_t
|
1152
|
+
A allocator_;
|
1153
|
+
uint32_t start_;
|
1154
|
+
uint32_t num_;
|
1127
1155
|
};
|
1128
1156
|
|
1129
1157
|
} /* namespace datasketches */
|