datasketches 0.2.3 → 0.2.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +8 -8
- data/ext/datasketches/kll_wrapper.cpp +7 -3
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +25 -5
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +6 -5
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +14 -0
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +3 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +29 -11
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +5 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +108 -41
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +150 -132
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +165 -31
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/pyproject.toml +1 -1
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/README.md +13 -9
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +48 -13
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +1 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +10 -4
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
- data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +656 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1373 -0
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +975 -0
- data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +6 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +30 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +73 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +95 -63
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +74 -3
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +44 -7
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +44 -33
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -4
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +34 -9
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
- metadata +33 -12
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -23,7 +23,10 @@
|
|
23
23
|
#include <iostream>
|
24
24
|
#include <iomanip>
|
25
25
|
#include <sstream>
|
26
|
+
#include <stdexcept>
|
26
27
|
|
28
|
+
#include "conditional_forward.hpp"
|
29
|
+
#include "count_zeros.hpp"
|
27
30
|
#include "memory_operations.hpp"
|
28
31
|
#include "kll_helper.hpp"
|
29
32
|
|
@@ -67,7 +70,7 @@ max_value_(nullptr),
|
|
67
70
|
is_level_zero_sorted_(other.is_level_zero_sorted_)
|
68
71
|
{
|
69
72
|
items_ = allocator_.allocate(items_size_);
|
70
|
-
|
73
|
+
for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
|
71
74
|
if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
|
72
75
|
if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
|
73
76
|
}
|
@@ -146,19 +149,39 @@ kll_sketch<T, C, S, A>::~kll_sketch() {
|
|
146
149
|
}
|
147
150
|
|
148
151
|
template<typename T, typename C, typename S, typename A>
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
152
|
+
template<typename TT, typename CC, typename SS, typename AA>
|
153
|
+
kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator):
|
154
|
+
allocator_(allocator),
|
155
|
+
k_(other.k_),
|
156
|
+
m_(other.m_),
|
157
|
+
min_k_(other.min_k_),
|
158
|
+
n_(other.n_),
|
159
|
+
num_levels_(other.num_levels_),
|
160
|
+
levels_(other.levels_, allocator_),
|
161
|
+
items_(nullptr),
|
162
|
+
items_size_(other.items_size_),
|
163
|
+
min_value_(nullptr),
|
164
|
+
max_value_(nullptr),
|
165
|
+
is_level_zero_sorted_(other.is_level_zero_sorted_)
|
166
|
+
{
|
167
|
+
static_assert(
|
168
|
+
std::is_constructible<T, TT>::value,
|
169
|
+
"Type converting constructor requires new type to be constructible from existing type"
|
170
|
+
);
|
171
|
+
items_ = allocator_.allocate(items_size_);
|
172
|
+
for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
|
173
|
+
if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
|
174
|
+
if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
|
175
|
+
check_sorting();
|
154
176
|
}
|
155
177
|
|
156
178
|
template<typename T, typename C, typename S, typename A>
|
157
|
-
|
179
|
+
template<typename FwdT>
|
180
|
+
void kll_sketch<T, C, S, A>::update(FwdT&& value) {
|
158
181
|
if (!check_update_value(value)) { return; }
|
159
182
|
update_min_max(value);
|
160
183
|
const uint32_t index = internal_update();
|
161
|
-
new (&items_[index]) T(std::
|
184
|
+
new (&items_[index]) T(std::forward<FwdT>(value));
|
162
185
|
}
|
163
186
|
|
164
187
|
template<typename T, typename C, typename S, typename A>
|
@@ -181,22 +204,23 @@ uint32_t kll_sketch<T, C, S, A>::internal_update() {
|
|
181
204
|
}
|
182
205
|
|
183
206
|
template<typename T, typename C, typename S, typename A>
|
184
|
-
|
207
|
+
template<typename FwdSk>
|
208
|
+
void kll_sketch<T, C, S, A>::merge(FwdSk&& other) {
|
185
209
|
if (other.is_empty()) return;
|
186
210
|
if (m_ != other.m_) {
|
187
211
|
throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
|
188
212
|
}
|
189
213
|
if (is_empty()) {
|
190
|
-
min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
|
191
|
-
max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
|
214
|
+
min_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_value_));
|
215
|
+
max_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_value_));
|
192
216
|
} else {
|
193
|
-
if (C()(*other.min_value_, *min_value_)) *min_value_ = *other.min_value_;
|
194
|
-
if (C()(*max_value_, *other.max_value_)) *max_value_ = *other.max_value_;
|
217
|
+
if (C()(*other.min_value_, *min_value_)) *min_value_ = conditional_forward<FwdSk>(*other.min_value_);
|
218
|
+
if (C()(*max_value_, *other.max_value_)) *max_value_ = conditional_forward<FwdSk>(*other.max_value_);
|
195
219
|
}
|
196
220
|
const uint64_t final_n = n_ + other.n_;
|
197
221
|
for (uint32_t i = other.levels_[0]; i < other.levels_[1]; i++) {
|
198
222
|
const uint32_t index = internal_update();
|
199
|
-
new (&items_[index]) T(other.items_[i]);
|
223
|
+
new (&items_[index]) T(conditional_forward<FwdSk>(other.items_[i]));
|
200
224
|
}
|
201
225
|
if (other.num_levels_ >= 2) merge_higher_levels(other, final_n);
|
202
226
|
n_ = final_n;
|
@@ -204,30 +228,6 @@ void kll_sketch<T, C, S, A>::merge(const kll_sketch& other) {
|
|
204
228
|
assert_correct_total_weight();
|
205
229
|
}
|
206
230
|
|
207
|
-
template<typename T, typename C, typename S, typename A>
|
208
|
-
void kll_sketch<T, C, S, A>::merge(kll_sketch&& other) {
|
209
|
-
if (other.is_empty()) return;
|
210
|
-
if (m_ != other.m_) {
|
211
|
-
throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
|
212
|
-
}
|
213
|
-
if (is_empty()) {
|
214
|
-
min_value_ = new (allocator_.allocate(1)) T(std::move(*other.min_value_));
|
215
|
-
max_value_ = new (allocator_.allocate(1)) T(std::move(*other.max_value_));
|
216
|
-
} else {
|
217
|
-
if (C()(*other.min_value_, *min_value_)) *min_value_ = std::move(*other.min_value_);
|
218
|
-
if (C()(*max_value_, *other.max_value_)) *max_value_ = std::move(*other.max_value_);
|
219
|
-
}
|
220
|
-
const uint64_t final_n = n_ + other.n_;
|
221
|
-
for (uint32_t i = other.levels_[0]; i < other.levels_[1]; i++) {
|
222
|
-
const uint32_t index = internal_update();
|
223
|
-
new (&items_[index]) T(std::move(other.items_[i]));
|
224
|
-
}
|
225
|
-
if (other.num_levels_ >= 2) merge_higher_levels(std::forward<kll_sketch>(other), final_n);
|
226
|
-
n_ = final_n;
|
227
|
-
if (other.is_estimation_mode()) min_k_ = std::min(min_k_, other.min_k_);
|
228
|
-
assert_correct_total_weight();
|
229
|
-
}
|
230
|
-
|
231
231
|
template<typename T, typename C, typename S, typename A>
|
232
232
|
bool kll_sketch<T, C, S, A>::is_empty() const {
|
233
233
|
return n_ == 0;
|
@@ -266,43 +266,49 @@ T kll_sketch<T, C, S, A>::get_max_value() const {
|
|
266
266
|
}
|
267
267
|
|
268
268
|
template<typename T, typename C, typename S, typename A>
|
269
|
-
|
269
|
+
C kll_sketch<T, C, S, A>::get_comparator() const {
|
270
|
+
return C();
|
271
|
+
}
|
272
|
+
|
273
|
+
template<typename T, typename C, typename S, typename A>
|
274
|
+
template<bool inclusive>
|
275
|
+
auto kll_sketch<T, C, S, A>::get_quantile(double rank) const -> quantile_return_type {
|
270
276
|
if (is_empty()) return get_invalid_value();
|
271
|
-
if (
|
272
|
-
if (
|
273
|
-
if ((
|
277
|
+
if (rank == 0.0) return *min_value_;
|
278
|
+
if (rank == 1.0) return *max_value_;
|
279
|
+
if ((rank < 0.0) || (rank > 1.0)) {
|
274
280
|
throw std::invalid_argument("Fraction cannot be less than zero or greater than 1.0");
|
275
281
|
}
|
276
|
-
//
|
277
|
-
|
278
|
-
return quantile_calculator->get_quantile(fraction);
|
282
|
+
// may have a side effect of sorting level zero if needed
|
283
|
+
return get_sorted_view<inclusive>(true).get_quantile(rank);
|
279
284
|
}
|
280
285
|
|
281
286
|
template<typename T, typename C, typename S, typename A>
|
282
|
-
|
287
|
+
template<bool inclusive>
|
288
|
+
std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* ranks, uint32_t size) const {
|
283
289
|
std::vector<T, A> quantiles(allocator_);
|
284
290
|
if (is_empty()) return quantiles;
|
285
|
-
std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> quantile_calculator;
|
286
291
|
quantiles.reserve(size);
|
292
|
+
|
293
|
+
// may have a side effect of sorting level zero if needed
|
294
|
+
auto view = get_sorted_view<inclusive>(true);
|
295
|
+
|
287
296
|
for (uint32_t i = 0; i < size; i++) {
|
288
|
-
const double
|
289
|
-
if ((
|
297
|
+
const double rank = ranks[i];
|
298
|
+
if ((rank < 0.0) || (rank > 1.0)) {
|
290
299
|
throw std::invalid_argument("Fraction cannot be less than zero or greater than 1.0");
|
291
300
|
}
|
292
|
-
if
|
293
|
-
else if (
|
301
|
+
else if (rank == 0.0) quantiles.push_back(*min_value_);
|
302
|
+
else if (rank == 1.0) quantiles.push_back(*max_value_);
|
294
303
|
else {
|
295
|
-
|
296
|
-
// has side effect of sorting level zero if needed
|
297
|
-
quantile_calculator = const_cast<kll_sketch*>(this)->get_quantile_calculator();
|
298
|
-
}
|
299
|
-
quantiles.push_back(quantile_calculator->get_quantile(fraction));
|
304
|
+
quantiles.push_back(view.get_quantile(rank));
|
300
305
|
}
|
301
306
|
}
|
302
307
|
return quantiles;
|
303
308
|
}
|
304
309
|
|
305
310
|
template<typename T, typename C, typename S, typename A>
|
311
|
+
template<bool inclusive>
|
306
312
|
std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(uint32_t num) const {
|
307
313
|
if (is_empty()) return std::vector<T, A>(allocator_);
|
308
314
|
if (num == 0) {
|
@@ -316,20 +322,21 @@ std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(uint32_t num) const {
|
|
316
322
|
if (num > 1) {
|
317
323
|
fractions[num - 1] = 1.0;
|
318
324
|
}
|
319
|
-
return get_quantiles(fractions.data(), num);
|
325
|
+
return get_quantiles<inclusive>(fractions.data(), num);
|
320
326
|
}
|
321
327
|
|
322
328
|
template<typename T, typename C, typename S, typename A>
|
329
|
+
template<bool inclusive>
|
323
330
|
double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
|
324
331
|
if (is_empty()) return std::numeric_limits<double>::quiet_NaN();
|
325
332
|
uint8_t level = 0;
|
326
333
|
uint64_t weight = 1;
|
327
334
|
uint64_t total = 0;
|
328
335
|
while (level < num_levels_) {
|
329
|
-
const auto from_index
|
330
|
-
const auto to_index
|
336
|
+
const auto from_index = levels_[level];
|
337
|
+
const auto to_index = levels_[level + 1]; // exclusive
|
331
338
|
for (uint32_t i = from_index; i < to_index; i++) {
|
332
|
-
if (C()(items_[i], value)) {
|
339
|
+
if (inclusive ? !C()(value, items_[i]) : C()(items_[i], value)) {
|
333
340
|
total += weight;
|
334
341
|
} else if ((level > 0) || is_level_zero_sorted_) {
|
335
342
|
break; // levels above 0 are sorted, no point comparing further
|
@@ -342,13 +349,15 @@ double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
|
|
342
349
|
}
|
343
350
|
|
344
351
|
template<typename T, typename C, typename S, typename A>
|
352
|
+
template<bool inclusive>
|
345
353
|
vector_d<A> kll_sketch<T, C, S, A>::get_PMF(const T* split_points, uint32_t size) const {
|
346
|
-
return get_PMF_or_CDF(split_points, size, false);
|
354
|
+
return get_PMF_or_CDF<inclusive>(split_points, size, false);
|
347
355
|
}
|
348
356
|
|
349
357
|
template<typename T, typename C, typename S, typename A>
|
358
|
+
template<bool inclusive>
|
350
359
|
vector_d<A> kll_sketch<T, C, S, A>::get_CDF(const T* split_points, uint32_t size) const {
|
351
|
-
return get_PMF_or_CDF(split_points, size, true);
|
360
|
+
return get_PMF_or_CDF<inclusive>(split_points, size, true);
|
352
361
|
}
|
353
362
|
|
354
363
|
template<typename T, typename C, typename S, typename A>
|
@@ -358,8 +367,8 @@ double kll_sketch<T, C, S, A>::get_normalized_rank_error(bool pmf) const {
|
|
358
367
|
|
359
368
|
// implementation for fixed-size arithmetic types (integral and floating point)
|
360
369
|
template<typename T, typename C, typename S, typename A>
|
361
|
-
template<typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
362
|
-
size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes() const {
|
370
|
+
template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
371
|
+
size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe&) const {
|
363
372
|
if (is_empty()) { return EMPTY_SIZE_BYTES; }
|
364
373
|
if (num_levels_ == 1 && get_num_retained() == 1) {
|
365
374
|
return DATA_START_SINGLE_ITEM + sizeof(TT);
|
@@ -370,17 +379,17 @@ size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes() const {
|
|
370
379
|
|
371
380
|
// implementation for all other types
|
372
381
|
template<typename T, typename C, typename S, typename A>
|
373
|
-
template<typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
|
374
|
-
size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes() const {
|
382
|
+
template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
|
383
|
+
size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
|
375
384
|
if (is_empty()) { return EMPTY_SIZE_BYTES; }
|
376
385
|
if (num_levels_ == 1 && get_num_retained() == 1) {
|
377
|
-
return DATA_START_SINGLE_ITEM +
|
386
|
+
return DATA_START_SINGLE_ITEM + sd.size_of_item(items_[levels_[0]]);
|
378
387
|
}
|
379
388
|
// the last integer in the levels_ array is not serialized because it can be derived
|
380
389
|
size_t size = DATA_START + num_levels_ * sizeof(uint32_t);
|
381
|
-
size +=
|
382
|
-
size +=
|
383
|
-
for (auto it: *this) size +=
|
390
|
+
size += sd.size_of_item(*min_value_);
|
391
|
+
size += sd.size_of_item(*max_value_);
|
392
|
+
for (auto it: *this) size += sd.size_of_item(it.first);
|
384
393
|
return size;
|
385
394
|
}
|
386
395
|
|
@@ -405,7 +414,8 @@ size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_
|
|
405
414
|
}
|
406
415
|
|
407
416
|
template<typename T, typename C, typename S, typename A>
|
408
|
-
|
417
|
+
template<typename SerDe>
|
418
|
+
void kll_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const {
|
409
419
|
const bool is_single_item = n_ == 1;
|
410
420
|
const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
|
411
421
|
write(os, preamble_ints);
|
@@ -430,16 +440,17 @@ void kll_sketch<T, C, S, A>::serialize(std::ostream& os) const {
|
|
430
440
|
write(os, num_levels_);
|
431
441
|
write(os, unused);
|
432
442
|
write(os, levels_.data(), sizeof(levels_[0]) * num_levels_);
|
433
|
-
|
434
|
-
|
443
|
+
sd.serialize(os, min_value_, 1);
|
444
|
+
sd.serialize(os, max_value_, 1);
|
435
445
|
}
|
436
|
-
|
446
|
+
sd.serialize(os, &items_[levels_[0]], get_num_retained());
|
437
447
|
}
|
438
448
|
|
439
449
|
template<typename T, typename C, typename S, typename A>
|
440
|
-
|
450
|
+
template<typename SerDe>
|
451
|
+
vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
|
441
452
|
const bool is_single_item = n_ == 1;
|
442
|
-
const size_t size = header_size_bytes + get_serialized_size_bytes();
|
453
|
+
const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
|
443
454
|
vector_u8<A> bytes(size, 0, allocator_);
|
444
455
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
445
456
|
const uint8_t* end_ptr = ptr + size;
|
@@ -465,11 +476,11 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const
|
|
465
476
|
ptr += copy_to_mem(num_levels_, ptr);
|
466
477
|
ptr += sizeof(uint8_t); // unused
|
467
478
|
ptr += copy_to_mem(levels_.data(), ptr, sizeof(levels_[0]) * num_levels_);
|
468
|
-
ptr +=
|
469
|
-
ptr +=
|
479
|
+
ptr += sd.serialize(ptr, end_ptr - ptr, min_value_, 1);
|
480
|
+
ptr += sd.serialize(ptr, end_ptr - ptr, max_value_, 1);
|
470
481
|
}
|
471
482
|
const size_t bytes_remaining = end_ptr - ptr;
|
472
|
-
ptr +=
|
483
|
+
ptr += sd.serialize(ptr, bytes_remaining, &items_[levels_[0]], get_num_retained());
|
473
484
|
}
|
474
485
|
const size_t delta = ptr - bytes.data();
|
475
486
|
if (delta != size) throw std::logic_error("serialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
|
@@ -478,6 +489,12 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const
|
|
478
489
|
|
479
490
|
template<typename T, typename C, typename S, typename A>
|
480
491
|
kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
|
492
|
+
return deserialize(is, S(), allocator);
|
493
|
+
}
|
494
|
+
|
495
|
+
template<typename T, typename C, typename S, typename A>
|
496
|
+
template<typename SerDe>
|
497
|
+
kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
|
481
498
|
const auto preamble_ints = read<uint8_t>(is);
|
482
499
|
const auto serial_version = read<uint8_t>(is);
|
483
500
|
const auto family_id = read<uint8_t>(is);
|
@@ -525,17 +542,17 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
|
|
525
542
|
std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
|
526
543
|
std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
|
527
544
|
if (!is_single_item) {
|
528
|
-
|
545
|
+
sd.deserialize(is, min_value_buffer.get(), 1);
|
529
546
|
// serde call did not throw, repackage with destrtuctor
|
530
547
|
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
|
531
|
-
|
548
|
+
sd.deserialize(is, max_value_buffer.get(), 1);
|
532
549
|
// serde call did not throw, repackage with destrtuctor
|
533
550
|
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
|
534
551
|
}
|
535
552
|
auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
|
536
553
|
std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
|
537
554
|
const auto num_items = levels[num_levels] - levels[0];
|
538
|
-
|
555
|
+
sd.deserialize(is, &items_buffer.get()[levels[0]], num_items);
|
539
556
|
// serde call did not throw, repackage with destrtuctors
|
540
557
|
std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
|
541
558
|
const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
|
@@ -555,6 +572,12 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
|
|
555
572
|
|
556
573
|
template<typename T, typename C, typename S, typename A>
|
557
574
|
kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
|
575
|
+
return deserialize(bytes, size, S(), allocator);
|
576
|
+
}
|
577
|
+
|
578
|
+
template<typename T, typename C, typename S, typename A>
|
579
|
+
template<typename SerDe>
|
580
|
+
kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
|
558
581
|
ensure_minimum_memory(size, 8);
|
559
582
|
const char* ptr = static_cast<const char*>(bytes);
|
560
583
|
uint8_t preamble_ints;
|
@@ -611,17 +634,17 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
|
|
611
634
|
std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
|
612
635
|
std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
|
613
636
|
if (!is_single_item) {
|
614
|
-
ptr +=
|
637
|
+
ptr += sd.deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
|
615
638
|
// serde call did not throw, repackage with destrtuctor
|
616
639
|
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
|
617
|
-
ptr +=
|
640
|
+
ptr += sd.deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
|
618
641
|
// serde call did not throw, repackage with destrtuctor
|
619
642
|
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
|
620
643
|
}
|
621
644
|
auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
|
622
645
|
std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
|
623
646
|
const auto num_items = levels[num_levels] - levels[0];
|
624
|
-
ptr +=
|
647
|
+
ptr += sd.deserialize(ptr, end_ptr - ptr, &items_buffer.get()[levels[0]], num_items);
|
625
648
|
// serde call did not throw, repackage with destrtuctors
|
626
649
|
std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
|
627
650
|
const size_t delta = ptr - static_cast<const char*>(bytes);
|
@@ -699,7 +722,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
|
|
699
722
|
// level zero might not be sorted, so we must sort it if we wish to compact it
|
700
723
|
// sort_level_zero() is not used here because of the adjustment for odd number of items
|
701
724
|
if ((level == 0) && !is_level_zero_sorted_) {
|
702
|
-
std::sort(
|
725
|
+
std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop, C());
|
703
726
|
}
|
704
727
|
if (pop_above == 0) {
|
705
728
|
kll_helper::randomly_halve_up(items_, adj_beg, adj_pop);
|
@@ -722,7 +745,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
|
|
722
745
|
// so that the freed-up space can be used by level zero
|
723
746
|
if (level > 0) {
|
724
747
|
const uint32_t amount = raw_beg - levels_[0];
|
725
|
-
std::move_backward(
|
748
|
+
std::move_backward(items_ + levels_[0], items_ + levels_[0] + amount, items_ + levels_[0] + half_adj_pop + amount);
|
726
749
|
for (uint8_t lvl = 0; lvl < level; lvl++) levels_[lvl] += half_adj_pop;
|
727
750
|
}
|
728
751
|
for (uint32_t i = 0; i < half_adj_pop; i++) items_[i + destroy_beg].~T();
|
@@ -780,24 +803,39 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
|
|
780
803
|
template<typename T, typename C, typename S, typename A>
|
781
804
|
void kll_sketch<T, C, S, A>::sort_level_zero() {
|
782
805
|
if (!is_level_zero_sorted_) {
|
783
|
-
std::sort(
|
806
|
+
std::sort(items_ + levels_[0], items_ + levels_[1], C());
|
784
807
|
is_level_zero_sorted_ = true;
|
785
808
|
}
|
786
809
|
}
|
787
810
|
|
788
811
|
template<typename T, typename C, typename S, typename A>
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
812
|
+
void kll_sketch<T, C, S, A>::check_sorting() const {
|
813
|
+
// not checking level 0
|
814
|
+
for (uint8_t level = 1; level < num_levels_; ++level) {
|
815
|
+
const auto from = items_ + levels_[level];
|
816
|
+
const auto to = items_ + levels_[level + 1];
|
817
|
+
if (!std::is_sorted(from, to, C())) {
|
818
|
+
throw std::logic_error("levels must be sorted");
|
819
|
+
}
|
820
|
+
}
|
821
|
+
}
|
822
|
+
|
823
|
+
template<typename T, typename C, typename S, typename A>
|
824
|
+
template<bool inclusive>
|
825
|
+
quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
|
826
|
+
const_cast<kll_sketch*>(this)->sort_level_zero(); // allow this side effect
|
827
|
+
quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
|
828
|
+
for (uint8_t level = 0; level < num_levels_; ++level) {
|
829
|
+
const auto from = items_ + levels_[level];
|
830
|
+
const auto to = items_ + levels_[level + 1]; // exclusive
|
831
|
+
view.add(from, to, 1 << level);
|
832
|
+
}
|
833
|
+
if (cumulative) view.template convert_to_cummulative<inclusive>();
|
834
|
+
return view;
|
798
835
|
}
|
799
836
|
|
800
837
|
template<typename T, typename C, typename S, typename A>
|
838
|
+
template<bool inclusive>
|
801
839
|
vector_d<A> kll_sketch<T, C, S, A>::get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const {
|
802
840
|
if (is_empty()) return vector_d<A>(allocator_);
|
803
841
|
kll_helper::validate_values<T, C>(split_points, size);
|
@@ -808,9 +846,9 @@ vector_d<A> kll_sketch<T, C, S, A>::get_PMF_or_CDF(const T* split_points, uint32
|
|
808
846
|
const auto from_index = levels_[level];
|
809
847
|
const auto to_index = levels_[level + 1]; // exclusive
|
810
848
|
if ((level == 0) && !is_level_zero_sorted_) {
|
811
|
-
increment_buckets_unsorted_level(from_index, to_index, weight, split_points, size, buckets.data());
|
849
|
+
increment_buckets_unsorted_level<inclusive>(from_index, to_index, weight, split_points, size, buckets.data());
|
812
850
|
} else {
|
813
|
-
increment_buckets_sorted_level(from_index, to_index, weight, split_points, size, buckets.data());
|
851
|
+
increment_buckets_sorted_level<inclusive>(from_index, to_index, weight, split_points, size, buckets.data());
|
814
852
|
}
|
815
853
|
level++;
|
816
854
|
weight *= 2;
|
@@ -831,13 +869,14 @@ vector_d<A> kll_sketch<T, C, S, A>::get_PMF_or_CDF(const T* split_points, uint32
|
|
831
869
|
}
|
832
870
|
|
833
871
|
template<typename T, typename C, typename S, typename A>
|
872
|
+
template<bool inclusive>
|
834
873
|
void kll_sketch<T, C, S, A>::increment_buckets_unsorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
|
835
874
|
const T* split_points, uint32_t size, double* buckets) const
|
836
875
|
{
|
837
876
|
for (uint32_t i = from_index; i < to_index; i++) {
|
838
877
|
uint32_t j;
|
839
878
|
for (j = 0; j < size; j++) {
|
840
|
-
if (C()(items_[i], split_points[j])) {
|
879
|
+
if (inclusive ? !C()(split_points[j], items_[i]) : C()(items_[i], split_points[j])) {
|
841
880
|
break;
|
842
881
|
}
|
843
882
|
}
|
@@ -846,13 +885,14 @@ void kll_sketch<T, C, S, A>::increment_buckets_unsorted_level(uint32_t from_inde
|
|
846
885
|
}
|
847
886
|
|
848
887
|
template<typename T, typename C, typename S, typename A>
|
888
|
+
template<bool inclusive>
|
849
889
|
void kll_sketch<T, C, S, A>::increment_buckets_sorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
|
850
890
|
const T* split_points, uint32_t size, double* buckets) const
|
851
891
|
{
|
852
892
|
uint32_t i = from_index;
|
853
893
|
uint32_t j = 0;
|
854
894
|
while ((i < to_index) && (j < size)) {
|
855
|
-
if (C()(items_[i], split_points[j])) {
|
895
|
+
if (inclusive ? !C()(split_points[j], items_[i]) : C()(items_[i], split_points[j])) {
|
856
896
|
buckets[j] += weight; // this sample goes into this bucket
|
857
897
|
i++; // move on to next sample and see whether it also goes into this bucket
|
858
898
|
} else {
|
@@ -910,9 +950,9 @@ void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
|
|
910
950
|
}
|
911
951
|
|
912
952
|
// this leaves items_ uninitialized (all objects moved out and destroyed)
|
913
|
-
// this version copies objects from the incoming sketch
|
914
953
|
template<typename T, typename C, typename S, typename A>
|
915
|
-
|
954
|
+
template<typename FwdSk>
|
955
|
+
void kll_sketch<T, C, S, A>::populate_work_arrays(FwdSk&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
|
916
956
|
worklevels[0] = 0;
|
917
957
|
|
918
958
|
// the level zero data from "other" was already inserted into "this"
|
@@ -927,32 +967,9 @@ void kll_sketch<T, C, S, A>::populate_work_arrays(const kll_sketch& other, T* wo
|
|
927
967
|
if ((self_pop > 0) && (other_pop == 0)) {
|
928
968
|
kll_helper::move_construct<T>(items_, levels_[lvl], levels_[lvl] + self_pop, workbuf, worklevels[lvl], true);
|
929
969
|
} else if ((self_pop == 0) && (other_pop > 0)) {
|
930
|
-
|
931
|
-
|
932
|
-
|
933
|
-
}
|
934
|
-
}
|
935
|
-
}
|
936
|
-
|
937
|
-
// this leaves items_ uninitialized (all objects moved out and destroyed)
|
938
|
-
// this version moves objects from the incoming sketch
|
939
|
-
template<typename T, typename C, typename S, typename A>
|
940
|
-
void kll_sketch<T, C, S, A>::populate_work_arrays(kll_sketch&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
|
941
|
-
worklevels[0] = 0;
|
942
|
-
|
943
|
-
// the level zero data from "other" was already inserted into "this"
|
944
|
-
kll_helper::move_construct<T>(items_, levels_[0], levels_[1], workbuf, 0, true);
|
945
|
-
worklevels[1] = safe_level_size(0);
|
946
|
-
|
947
|
-
for (uint8_t lvl = 1; lvl < provisional_num_levels; lvl++) {
|
948
|
-
const uint32_t self_pop = safe_level_size(lvl);
|
949
|
-
const uint32_t other_pop = other.safe_level_size(lvl);
|
950
|
-
worklevels[lvl + 1] = worklevels[lvl] + self_pop + other_pop;
|
951
|
-
|
952
|
-
if ((self_pop > 0) && (other_pop == 0)) {
|
953
|
-
kll_helper::move_construct<T>(items_, levels_[lvl], levels_[lvl] + self_pop, workbuf, worklevels[lvl], true);
|
954
|
-
} else if ((self_pop == 0) && (other_pop > 0)) {
|
955
|
-
kll_helper::move_construct<T>(other.items_, other.levels_[lvl], other.levels_[lvl] + other_pop, workbuf, worklevels[lvl], false);
|
970
|
+
for (auto i = other.levels_[lvl], j = worklevels[lvl]; i < other.levels_[lvl] + other_pop; ++i, ++j) {
|
971
|
+
new (&workbuf[j]) T(conditional_forward<FwdSk>(other.items_[i]));
|
972
|
+
}
|
956
973
|
} else if ((self_pop > 0) && (other_pop > 0)) {
|
957
974
|
kll_helper::merge_sorted_arrays<T, C>(items_, levels_[lvl], self_pop, other.items_, other.levels_[lvl], other_pop, workbuf, worklevels[lvl]);
|
958
975
|
}
|
@@ -1023,7 +1040,9 @@ void kll_sketch<T, C, S, A>::check_family_id(uint8_t family_id) {
|
|
1023
1040
|
|
1024
1041
|
template <typename T, typename C, typename S, typename A>
|
1025
1042
|
string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items) const {
|
1026
|
-
|
1043
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
1044
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
1045
|
+
std::ostringstream os;
|
1027
1046
|
os << "### KLL sketch summary:" << std::endl;
|
1028
1047
|
os << " K : " << k_ << std::endl;
|
1029
1048
|
os << " min K : " << min_k_ << std::endl;
|
@@ -1037,7 +1056,6 @@ string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
|
|
1037
1056
|
os << " Sorted : " << (is_level_zero_sorted_ ? "true" : "false") << std::endl;
|
1038
1057
|
os << " Capacity items : " << items_size_ << std::endl;
|
1039
1058
|
os << " Retained items : " << get_num_retained() << std::endl;
|
1040
|
-
os << " Storage bytes : " << get_serialized_size_bytes() << std::endl;
|
1041
1059
|
if (!is_empty()) {
|
1042
1060
|
os << " Min value : " << *min_value_ << std::endl;
|
1043
1061
|
os << " Max value : " << *max_value_ << std::endl;
|
@@ -1069,7 +1087,7 @@ string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
|
|
1069
1087
|
}
|
1070
1088
|
os << "### End sketch data" << std::endl;
|
1071
1089
|
}
|
1072
|
-
return os.str();
|
1090
|
+
return string<A>(os.str().c_str(), allocator_);
|
1073
1091
|
}
|
1074
1092
|
|
1075
1093
|
template <typename T, typename C, typename S, typename A>
|