datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,1131 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef KLL_SKETCH_IMPL_HPP_
|
|
21
|
+
#define KLL_SKETCH_IMPL_HPP_
|
|
22
|
+
|
|
23
|
+
#include <iostream>
|
|
24
|
+
#include <iomanip>
|
|
25
|
+
#include <sstream>
|
|
26
|
+
|
|
27
|
+
#include "memory_operations.hpp"
|
|
28
|
+
#include "kll_helper.hpp"
|
|
29
|
+
|
|
30
|
+
namespace datasketches {
|
|
31
|
+
|
|
32
|
+
template<typename T, typename C, typename S, typename A>
|
|
33
|
+
kll_sketch<T, C, S, A>::kll_sketch(uint16_t k):
|
|
34
|
+
k_(k),
|
|
35
|
+
m_(DEFAULT_M),
|
|
36
|
+
min_k_(k),
|
|
37
|
+
n_(0),
|
|
38
|
+
num_levels_(1),
|
|
39
|
+
levels_(2),
|
|
40
|
+
items_(nullptr),
|
|
41
|
+
items_size_(k_),
|
|
42
|
+
min_value_(nullptr),
|
|
43
|
+
max_value_(nullptr),
|
|
44
|
+
is_level_zero_sorted_(false)
|
|
45
|
+
{
|
|
46
|
+
if (k < MIN_K || k > MAX_K) {
|
|
47
|
+
throw std::invalid_argument("K must be >= " + std::to_string(MIN_K) + " and <= " + std::to_string(MAX_K) + ": " + std::to_string(k));
|
|
48
|
+
}
|
|
49
|
+
levels_[0] = levels_[1] = k;
|
|
50
|
+
items_ = A().allocate(items_size_);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
template<typename T, typename C, typename S, typename A>
|
|
54
|
+
kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch& other):
|
|
55
|
+
k_(other.k_),
|
|
56
|
+
m_(other.m_),
|
|
57
|
+
min_k_(other.min_k_),
|
|
58
|
+
n_(other.n_),
|
|
59
|
+
num_levels_(other.num_levels_),
|
|
60
|
+
levels_(other.levels_),
|
|
61
|
+
items_(nullptr),
|
|
62
|
+
items_size_(other.items_size_),
|
|
63
|
+
min_value_(nullptr),
|
|
64
|
+
max_value_(nullptr),
|
|
65
|
+
is_level_zero_sorted_(other.is_level_zero_sorted_)
|
|
66
|
+
{
|
|
67
|
+
items_ = A().allocate(items_size_);
|
|
68
|
+
std::copy(&other.items_[levels_[0]], &other.items_[levels_[num_levels_]], &items_[levels_[0]]);
|
|
69
|
+
if (other.min_value_ != nullptr) min_value_ = new (A().allocate(1)) T(*other.min_value_);
|
|
70
|
+
if (other.max_value_ != nullptr) max_value_ = new (A().allocate(1)) T(*other.max_value_);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
template<typename T, typename C, typename S, typename A>
|
|
74
|
+
kll_sketch<T, C, S, A>::kll_sketch(kll_sketch&& other) noexcept:
|
|
75
|
+
k_(other.k_),
|
|
76
|
+
m_(other.m_),
|
|
77
|
+
min_k_(other.min_k_),
|
|
78
|
+
n_(other.n_),
|
|
79
|
+
num_levels_(other.num_levels_),
|
|
80
|
+
levels_(std::move(other.levels_)),
|
|
81
|
+
items_(other.items_),
|
|
82
|
+
items_size_(other.items_size_),
|
|
83
|
+
min_value_(other.min_value_),
|
|
84
|
+
max_value_(other.max_value_),
|
|
85
|
+
is_level_zero_sorted_(other.is_level_zero_sorted_)
|
|
86
|
+
{
|
|
87
|
+
other.items_ = nullptr;
|
|
88
|
+
other.min_value_ = nullptr;
|
|
89
|
+
other.max_value_ = nullptr;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
template<typename T, typename C, typename S, typename A>
|
|
93
|
+
kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(const kll_sketch& other) {
|
|
94
|
+
kll_sketch copy(other);
|
|
95
|
+
std::swap(k_, copy.k_);
|
|
96
|
+
std::swap(m_, copy.m_);
|
|
97
|
+
std::swap(min_k_, copy.min_k_);
|
|
98
|
+
std::swap(n_, copy.n_);
|
|
99
|
+
std::swap(num_levels_, copy.num_levels_);
|
|
100
|
+
std::swap(levels_, copy.levels_);
|
|
101
|
+
std::swap(items_, copy.items_);
|
|
102
|
+
std::swap(items_size_, copy.items_size_);
|
|
103
|
+
std::swap(min_value_, copy.min_value_);
|
|
104
|
+
std::swap(max_value_, copy.max_value_);
|
|
105
|
+
std::swap(is_level_zero_sorted_, copy.is_level_zero_sorted_);
|
|
106
|
+
return *this;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
template<typename T, typename C, typename S, typename A>
|
|
110
|
+
kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(kll_sketch&& other) {
|
|
111
|
+
std::swap(k_, other.k_);
|
|
112
|
+
std::swap(m_, other.m_);
|
|
113
|
+
std::swap(min_k_, other.min_k_);
|
|
114
|
+
std::swap(n_, other.n_);
|
|
115
|
+
std::swap(num_levels_, other.num_levels_);
|
|
116
|
+
std::swap(levels_, other.levels_);
|
|
117
|
+
std::swap(items_, other.items_);
|
|
118
|
+
std::swap(items_size_, other.items_size_);
|
|
119
|
+
std::swap(min_value_, other.min_value_);
|
|
120
|
+
std::swap(max_value_, other.max_value_);
|
|
121
|
+
std::swap(is_level_zero_sorted_, other.is_level_zero_sorted_);
|
|
122
|
+
return *this;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
template<typename T, typename C, typename S, typename A>
|
|
126
|
+
kll_sketch<T, C, S, A>::~kll_sketch() {
|
|
127
|
+
if (items_ != nullptr) {
|
|
128
|
+
const uint32_t begin = levels_[0];
|
|
129
|
+
const uint32_t end = levels_[num_levels_];
|
|
130
|
+
for (uint32_t i = begin; i < end; i++) items_[i].~T();
|
|
131
|
+
A().deallocate(items_, items_size_);
|
|
132
|
+
}
|
|
133
|
+
if (min_value_ != nullptr) {
|
|
134
|
+
min_value_->~T();
|
|
135
|
+
A().deallocate(min_value_, 1);
|
|
136
|
+
}
|
|
137
|
+
if (max_value_ != nullptr) {
|
|
138
|
+
max_value_->~T();
|
|
139
|
+
A().deallocate(max_value_, 1);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
template<typename T, typename C, typename S, typename A>
|
|
144
|
+
void kll_sketch<T, C, S, A>::update(const T& value) {
|
|
145
|
+
if (!check_update_value(value)) { return; }
|
|
146
|
+
update_min_max(value);
|
|
147
|
+
const uint32_t index = internal_update();
|
|
148
|
+
new (&items_[index]) T(value);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
template<typename T, typename C, typename S, typename A>
|
|
152
|
+
void kll_sketch<T, C, S, A>::update(T&& value) {
|
|
153
|
+
if (!check_update_value(value)) { return; }
|
|
154
|
+
update_min_max(value);
|
|
155
|
+
const uint32_t index = internal_update();
|
|
156
|
+
new (&items_[index]) T(std::move(value));
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
template<typename T, typename C, typename S, typename A>
|
|
160
|
+
void kll_sketch<T, C, S, A>::update_min_max(const T& value) {
|
|
161
|
+
if (is_empty()) {
|
|
162
|
+
min_value_ = new (A().allocate(1)) T(value);
|
|
163
|
+
max_value_ = new (A().allocate(1)) T(value);
|
|
164
|
+
} else {
|
|
165
|
+
if (C()(value, *min_value_)) *min_value_ = value;
|
|
166
|
+
if (C()(*max_value_, value)) *max_value_ = value;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
template<typename T, typename C, typename S, typename A>
|
|
171
|
+
uint32_t kll_sketch<T, C, S, A>::internal_update() {
|
|
172
|
+
if (levels_[0] == 0) compress_while_updating();
|
|
173
|
+
n_++;
|
|
174
|
+
is_level_zero_sorted_ = false;
|
|
175
|
+
return --levels_[0];
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
template<typename T, typename C, typename S, typename A>
|
|
179
|
+
void kll_sketch<T, C, S, A>::merge(const kll_sketch& other) {
|
|
180
|
+
if (other.is_empty()) return;
|
|
181
|
+
if (m_ != other.m_) {
|
|
182
|
+
throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
|
|
183
|
+
}
|
|
184
|
+
if (is_empty()) {
|
|
185
|
+
min_value_ = new (A().allocate(1)) T(*other.min_value_);
|
|
186
|
+
max_value_ = new (A().allocate(1)) T(*other.max_value_);
|
|
187
|
+
} else {
|
|
188
|
+
if (C()(*other.min_value_, *min_value_)) *min_value_ = *other.min_value_;
|
|
189
|
+
if (C()(*max_value_, *other.max_value_)) *max_value_ = *other.max_value_;
|
|
190
|
+
}
|
|
191
|
+
const uint64_t final_n = n_ + other.n_;
|
|
192
|
+
for (uint32_t i = other.levels_[0]; i < other.levels_[1]; i++) {
|
|
193
|
+
const uint32_t index = internal_update();
|
|
194
|
+
new (&items_[index]) T(other.items_[i]);
|
|
195
|
+
}
|
|
196
|
+
if (other.num_levels_ >= 2) merge_higher_levels(other, final_n);
|
|
197
|
+
n_ = final_n;
|
|
198
|
+
if (other.is_estimation_mode()) min_k_ = std::min(min_k_, other.min_k_);
|
|
199
|
+
assert_correct_total_weight();
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
template<typename T, typename C, typename S, typename A>
|
|
203
|
+
void kll_sketch<T, C, S, A>::merge(kll_sketch&& other) {
|
|
204
|
+
if (other.is_empty()) return;
|
|
205
|
+
if (m_ != other.m_) {
|
|
206
|
+
throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
|
|
207
|
+
}
|
|
208
|
+
if (is_empty()) {
|
|
209
|
+
min_value_ = new (A().allocate(1)) T(std::move(*other.min_value_));
|
|
210
|
+
max_value_ = new (A().allocate(1)) T(std::move(*other.max_value_));
|
|
211
|
+
} else {
|
|
212
|
+
if (C()(*other.min_value_, *min_value_)) *min_value_ = std::move(*other.min_value_);
|
|
213
|
+
if (C()(*max_value_, *other.max_value_)) *max_value_ = std::move(*other.max_value_);
|
|
214
|
+
}
|
|
215
|
+
const uint64_t final_n = n_ + other.n_;
|
|
216
|
+
for (uint32_t i = other.levels_[0]; i < other.levels_[1]; i++) {
|
|
217
|
+
const uint32_t index = internal_update();
|
|
218
|
+
new (&items_[index]) T(std::move(other.items_[i]));
|
|
219
|
+
}
|
|
220
|
+
if (other.num_levels_ >= 2) merge_higher_levels(std::forward<kll_sketch>(other), final_n);
|
|
221
|
+
n_ = final_n;
|
|
222
|
+
if (other.is_estimation_mode()) min_k_ = std::min(min_k_, other.min_k_);
|
|
223
|
+
assert_correct_total_weight();
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
template<typename T, typename C, typename S, typename A>
|
|
227
|
+
bool kll_sketch<T, C, S, A>::is_empty() const {
|
|
228
|
+
return n_ == 0;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
template<typename T, typename C, typename S, typename A>
|
|
232
|
+
uint64_t kll_sketch<T, C, S, A>::get_n() const {
|
|
233
|
+
return n_;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
template<typename T, typename C, typename S, typename A>
|
|
237
|
+
uint32_t kll_sketch<T, C, S, A>::get_num_retained() const {
|
|
238
|
+
return levels_[num_levels_] - levels_[0];
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
template<typename T, typename C, typename S, typename A>
|
|
242
|
+
bool kll_sketch<T, C, S, A>::is_estimation_mode() const {
|
|
243
|
+
return num_levels_ > 1;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
template<typename T, typename C, typename S, typename A>
|
|
247
|
+
T kll_sketch<T, C, S, A>::get_min_value() const {
|
|
248
|
+
if (is_empty()) return get_invalid_value();
|
|
249
|
+
return *min_value_;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
template<typename T, typename C, typename S, typename A>
|
|
253
|
+
T kll_sketch<T, C, S, A>::get_max_value() const {
|
|
254
|
+
if (is_empty()) return get_invalid_value();
|
|
255
|
+
return *max_value_;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
template<typename T, typename C, typename S, typename A>
|
|
259
|
+
T kll_sketch<T, C, S, A>::get_quantile(double fraction) const {
|
|
260
|
+
if (is_empty()) return get_invalid_value();
|
|
261
|
+
if (fraction == 0.0) return *min_value_;
|
|
262
|
+
if (fraction == 1.0) return *max_value_;
|
|
263
|
+
if ((fraction < 0.0) || (fraction > 1.0)) {
|
|
264
|
+
throw std::invalid_argument("Fraction cannot be less than zero or greater than 1.0");
|
|
265
|
+
}
|
|
266
|
+
// has side effect of sorting level zero if needed
|
|
267
|
+
auto quantile_calculator(const_cast<kll_sketch*>(this)->get_quantile_calculator());
|
|
268
|
+
return quantile_calculator->get_quantile(fraction);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
template<typename T, typename C, typename S, typename A>
|
|
272
|
+
std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* fractions, uint32_t size) const {
|
|
273
|
+
std::vector<T, A> quantiles;
|
|
274
|
+
quantiles.reserve(size);
|
|
275
|
+
if (is_empty()) return quantiles;
|
|
276
|
+
std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> quantile_calculator;
|
|
277
|
+
quantiles.reserve(size);
|
|
278
|
+
for (uint32_t i = 0; i < size; i++) {
|
|
279
|
+
const double fraction = fractions[i];
|
|
280
|
+
if ((fraction < 0.0) || (fraction > 1.0)) {
|
|
281
|
+
throw std::invalid_argument("Fraction cannot be less than zero or greater than 1.0");
|
|
282
|
+
}
|
|
283
|
+
if (fraction == 0.0) quantiles.push_back(*min_value_);
|
|
284
|
+
else if (fraction == 1.0) quantiles.push_back(*max_value_);
|
|
285
|
+
else {
|
|
286
|
+
if (!quantile_calculator) {
|
|
287
|
+
// has side effect of sorting level zero if needed
|
|
288
|
+
quantile_calculator = const_cast<kll_sketch*>(this)->get_quantile_calculator();
|
|
289
|
+
}
|
|
290
|
+
quantiles.push_back(quantile_calculator->get_quantile(fraction));
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
return quantiles;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
template<typename T, typename C, typename S, typename A>
|
|
297
|
+
std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(size_t num) const {
|
|
298
|
+
if (is_empty()) return std::vector<T, A>();
|
|
299
|
+
if (num == 0) {
|
|
300
|
+
throw std::invalid_argument("num must be > 0");
|
|
301
|
+
}
|
|
302
|
+
std::vector<double> fractions(num);
|
|
303
|
+
fractions[0] = 0.0;
|
|
304
|
+
for (size_t i = 1; i < num; i++) {
|
|
305
|
+
fractions[i] = static_cast<double>(i) / (num - 1);
|
|
306
|
+
}
|
|
307
|
+
if (num > 1) {
|
|
308
|
+
fractions[num - 1] = 1.0;
|
|
309
|
+
}
|
|
310
|
+
return get_quantiles(fractions.data(), num);
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
template<typename T, typename C, typename S, typename A>
|
|
314
|
+
double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
|
|
315
|
+
if (is_empty()) return std::numeric_limits<double>::quiet_NaN();
|
|
316
|
+
uint8_t level = 0;
|
|
317
|
+
uint64_t weight = 1;
|
|
318
|
+
uint64_t total = 0;
|
|
319
|
+
while (level < num_levels_) {
|
|
320
|
+
const auto from_index(levels_[level]);
|
|
321
|
+
const auto to_index(levels_[level + 1]); // exclusive
|
|
322
|
+
for (uint32_t i = from_index; i < to_index; i++) {
|
|
323
|
+
if (C()(items_[i], value)) {
|
|
324
|
+
total += weight;
|
|
325
|
+
} else if ((level > 0) || is_level_zero_sorted_) {
|
|
326
|
+
break; // levels above 0 are sorted, no point comparing further
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
level++;
|
|
330
|
+
weight *= 2;
|
|
331
|
+
}
|
|
332
|
+
return (double) total / n_;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
template<typename T, typename C, typename S, typename A>
|
|
336
|
+
vector_d<A> kll_sketch<T, C, S, A>::get_PMF(const T* split_points, uint32_t size) const {
|
|
337
|
+
return get_PMF_or_CDF(split_points, size, false);
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
template<typename T, typename C, typename S, typename A>
|
|
341
|
+
vector_d<A> kll_sketch<T, C, S, A>::get_CDF(const T* split_points, uint32_t size) const {
|
|
342
|
+
return get_PMF_or_CDF(split_points, size, true);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
template<typename T, typename C, typename S, typename A>
|
|
346
|
+
double kll_sketch<T, C, S, A>::get_normalized_rank_error(bool pmf) const {
|
|
347
|
+
return get_normalized_rank_error(min_k_, pmf);
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// implementation for fixed-size arithmetic types (integral and floating point)
|
|
351
|
+
template<typename T, typename C, typename S, typename A>
|
|
352
|
+
template<typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
|
353
|
+
size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes() const {
|
|
354
|
+
if (is_empty()) { return EMPTY_SIZE_BYTES; }
|
|
355
|
+
if (num_levels_ == 1 && get_num_retained() == 1) {
|
|
356
|
+
return DATA_START_SINGLE_ITEM + sizeof(TT);
|
|
357
|
+
}
|
|
358
|
+
// the last integer in the levels_ array is not serialized because it can be derived
|
|
359
|
+
return DATA_START + num_levels_ * sizeof(uint32_t) + (get_num_retained() + 2) * sizeof(TT);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// implementation for all other types
|
|
363
|
+
template<typename T, typename C, typename S, typename A>
|
|
364
|
+
template<typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
|
|
365
|
+
size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes() const {
|
|
366
|
+
if (is_empty()) { return EMPTY_SIZE_BYTES; }
|
|
367
|
+
if (num_levels_ == 1 && get_num_retained() == 1) {
|
|
368
|
+
return DATA_START_SINGLE_ITEM + S().size_of_item(items_[levels_[0]]);
|
|
369
|
+
}
|
|
370
|
+
// the last integer in the levels_ array is not serialized because it can be derived
|
|
371
|
+
size_t size = DATA_START + num_levels_ * sizeof(uint32_t);
|
|
372
|
+
size += S().size_of_item(*min_value_);
|
|
373
|
+
size += S().size_of_item(*max_value_);
|
|
374
|
+
for (auto& it: *this) size += S().size_of_item(it.first);
|
|
375
|
+
return size;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
template<typename T, typename C, typename S, typename A>
|
|
379
|
+
void kll_sketch<T, C, S, A>::serialize(std::ostream& os) const {
|
|
380
|
+
const bool is_single_item = n_ == 1;
|
|
381
|
+
const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
|
|
382
|
+
os.write(reinterpret_cast<const char*>(&preamble_ints), sizeof(preamble_ints));
|
|
383
|
+
const uint8_t serial_version(is_single_item ? SERIAL_VERSION_2 : SERIAL_VERSION_1);
|
|
384
|
+
os.write(reinterpret_cast<const char*>(&serial_version), sizeof(serial_version));
|
|
385
|
+
const uint8_t family(FAMILY);
|
|
386
|
+
os.write(reinterpret_cast<const char*>(&family), sizeof(family));
|
|
387
|
+
const uint8_t flags_byte(
|
|
388
|
+
(is_empty() ? 1 << flags::IS_EMPTY : 0)
|
|
389
|
+
| (is_level_zero_sorted_ ? 1 << flags::IS_LEVEL_ZERO_SORTED : 0)
|
|
390
|
+
| (is_single_item ? 1 << flags::IS_SINGLE_ITEM : 0)
|
|
391
|
+
);
|
|
392
|
+
os.write(reinterpret_cast<const char*>(&flags_byte), sizeof(flags_byte));
|
|
393
|
+
os.write((char*)&k_, sizeof(k_));
|
|
394
|
+
os.write((char*)&m_, sizeof(m_));
|
|
395
|
+
const uint8_t unused = 0;
|
|
396
|
+
os.write(reinterpret_cast<const char*>(&unused), sizeof(unused));
|
|
397
|
+
if (is_empty()) return;
|
|
398
|
+
if (!is_single_item) {
|
|
399
|
+
os.write((char*)&n_, sizeof(n_));
|
|
400
|
+
os.write((char*)&min_k_, sizeof(min_k_));
|
|
401
|
+
os.write((char*)&num_levels_, sizeof(num_levels_));
|
|
402
|
+
os.write((char*)&unused, sizeof(unused));
|
|
403
|
+
os.write((char*)levels_.data(), sizeof(levels_[0]) * num_levels_);
|
|
404
|
+
S().serialize(os, min_value_, 1);
|
|
405
|
+
S().serialize(os, max_value_, 1);
|
|
406
|
+
}
|
|
407
|
+
S().serialize(os, &items_[levels_[0]], get_num_retained());
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
template<typename T, typename C, typename S, typename A>
|
|
411
|
+
vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const {
|
|
412
|
+
const bool is_single_item = n_ == 1;
|
|
413
|
+
const size_t size = header_size_bytes + get_serialized_size_bytes();
|
|
414
|
+
vector_u8<A> bytes(size);
|
|
415
|
+
uint8_t* ptr = bytes.data() + header_size_bytes;
|
|
416
|
+
const uint8_t* end_ptr = ptr + size;
|
|
417
|
+
const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
|
|
418
|
+
ptr += copy_to_mem(&preamble_ints, ptr, sizeof(preamble_ints));
|
|
419
|
+
const uint8_t serial_version(is_single_item ? SERIAL_VERSION_2 : SERIAL_VERSION_1);
|
|
420
|
+
ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
|
|
421
|
+
const uint8_t family(FAMILY);
|
|
422
|
+
ptr += copy_to_mem(&family, ptr, sizeof(family));
|
|
423
|
+
const uint8_t flags_byte(
|
|
424
|
+
(is_empty() ? 1 << flags::IS_EMPTY : 0)
|
|
425
|
+
| (is_level_zero_sorted_ ? 1 << flags::IS_LEVEL_ZERO_SORTED : 0)
|
|
426
|
+
| (is_single_item ? 1 << flags::IS_SINGLE_ITEM : 0)
|
|
427
|
+
);
|
|
428
|
+
ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
|
|
429
|
+
ptr += copy_to_mem(&k_, ptr, sizeof(k_));
|
|
430
|
+
ptr += copy_to_mem(&m_, ptr, sizeof(m_));
|
|
431
|
+
const uint8_t unused = 0;
|
|
432
|
+
ptr += copy_to_mem(&unused, ptr, sizeof(unused));
|
|
433
|
+
if (!is_empty()) {
|
|
434
|
+
if (!is_single_item) {
|
|
435
|
+
ptr += copy_to_mem(&n_, ptr, sizeof(n_));
|
|
436
|
+
ptr += copy_to_mem(&min_k_, ptr, sizeof(min_k_));
|
|
437
|
+
ptr += copy_to_mem(&num_levels_, ptr, sizeof(num_levels_));
|
|
438
|
+
ptr += copy_to_mem(&unused, ptr, sizeof(unused));
|
|
439
|
+
ptr += copy_to_mem(levels_.data(), ptr, sizeof(levels_[0]) * num_levels_);
|
|
440
|
+
ptr += S().serialize(ptr, end_ptr - ptr, min_value_, 1);
|
|
441
|
+
ptr += S().serialize(ptr, end_ptr - ptr, max_value_, 1);
|
|
442
|
+
}
|
|
443
|
+
const size_t bytes_remaining = end_ptr - ptr;
|
|
444
|
+
ptr += S().serialize(ptr, bytes_remaining, &items_[levels_[0]], get_num_retained());
|
|
445
|
+
}
|
|
446
|
+
const size_t delta = ptr - bytes.data();
|
|
447
|
+
if (delta != size) throw std::logic_error("serialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
|
|
448
|
+
return bytes;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
template<typename T, typename C, typename S, typename A>
|
|
452
|
+
kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is) {
|
|
453
|
+
uint8_t preamble_ints;
|
|
454
|
+
is.read((char*)&preamble_ints, sizeof(preamble_ints));
|
|
455
|
+
uint8_t serial_version;
|
|
456
|
+
is.read((char*)&serial_version, sizeof(serial_version));
|
|
457
|
+
uint8_t family_id;
|
|
458
|
+
is.read((char*)&family_id, sizeof(family_id));
|
|
459
|
+
uint8_t flags_byte;
|
|
460
|
+
is.read((char*)&flags_byte, sizeof(flags_byte));
|
|
461
|
+
uint16_t k;
|
|
462
|
+
is.read((char*)&k, sizeof(k));
|
|
463
|
+
uint8_t m;
|
|
464
|
+
is.read((char*)&m, sizeof(m));
|
|
465
|
+
uint8_t unused;
|
|
466
|
+
is.read((char*)&unused, sizeof(unused));
|
|
467
|
+
|
|
468
|
+
check_m(m);
|
|
469
|
+
check_preamble_ints(preamble_ints, flags_byte);
|
|
470
|
+
check_serial_version(serial_version);
|
|
471
|
+
check_family_id(family_id);
|
|
472
|
+
|
|
473
|
+
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
|
474
|
+
const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
|
|
475
|
+
if (is_empty) return kll_sketch(k);
|
|
476
|
+
|
|
477
|
+
uint64_t n;
|
|
478
|
+
uint16_t min_k;
|
|
479
|
+
uint8_t num_levels;
|
|
480
|
+
const bool is_single_item(flags_byte & (1 << flags::IS_SINGLE_ITEM)); // used in serial version 2
|
|
481
|
+
if (is_single_item) {
|
|
482
|
+
n = 1;
|
|
483
|
+
min_k = k;
|
|
484
|
+
num_levels = 1;
|
|
485
|
+
} else {
|
|
486
|
+
is.read((char*)&n, sizeof(n_));
|
|
487
|
+
is.read((char*)&min_k, sizeof(min_k_));
|
|
488
|
+
is.read((char*)&num_levels, sizeof(num_levels));
|
|
489
|
+
is.read((char*)&unused, sizeof(unused));
|
|
490
|
+
}
|
|
491
|
+
vector_u32<A> levels(num_levels + 1);
|
|
492
|
+
const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
|
|
493
|
+
if (is_single_item) {
|
|
494
|
+
levels[0] = capacity - 1;
|
|
495
|
+
} else {
|
|
496
|
+
// the last integer in levels_ is not serialized because it can be derived
|
|
497
|
+
is.read((char*)levels.data(), sizeof(levels[0]) * num_levels);
|
|
498
|
+
}
|
|
499
|
+
levels[num_levels] = capacity;
|
|
500
|
+
auto item_buffer_deleter = [](T* ptr) { A().deallocate(ptr, 1); };
|
|
501
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(A().allocate(1), item_buffer_deleter);
|
|
502
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(A().allocate(1), item_buffer_deleter);
|
|
503
|
+
std::unique_ptr<T, item_deleter> min_value;
|
|
504
|
+
std::unique_ptr<T, item_deleter> max_value;
|
|
505
|
+
if (!is_single_item) {
|
|
506
|
+
S().deserialize(is, min_value_buffer.get(), 1);
|
|
507
|
+
// serde call did not throw, repackage with destrtuctor
|
|
508
|
+
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
|
|
509
|
+
S().deserialize(is, max_value_buffer.get(), 1);
|
|
510
|
+
// serde call did not throw, repackage with destrtuctor
|
|
511
|
+
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
|
|
512
|
+
}
|
|
513
|
+
auto items_buffer_deleter = [capacity](T* ptr) { A().deallocate(ptr, capacity); };
|
|
514
|
+
std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(A().allocate(capacity), items_buffer_deleter);
|
|
515
|
+
const auto num_items = levels[num_levels] - levels[0];
|
|
516
|
+
S().deserialize(is, &items_buffer.get()[levels[0]], num_items);
|
|
517
|
+
// serde call did not throw, repackage with destrtuctors
|
|
518
|
+
std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity));
|
|
519
|
+
const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
|
|
520
|
+
if (is_single_item) {
|
|
521
|
+
new (min_value_buffer.get()) T(items.get()[levels[0]]);
|
|
522
|
+
// copy did not throw, repackage with destrtuctor
|
|
523
|
+
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
|
|
524
|
+
new (max_value_buffer.get()) T(items.get()[levels[0]]);
|
|
525
|
+
// copy did not throw, repackage with destrtuctor
|
|
526
|
+
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
|
|
527
|
+
}
|
|
528
|
+
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
|
529
|
+
return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
|
|
530
|
+
std::move(min_value), std::move(max_value), is_level_zero_sorted);
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
template<typename T, typename C, typename S, typename A>
|
|
534
|
+
kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size) {
|
|
535
|
+
ensure_minimum_memory(size, 8);
|
|
536
|
+
const char* ptr = static_cast<const char*>(bytes);
|
|
537
|
+
uint8_t preamble_ints;
|
|
538
|
+
ptr += copy_from_mem(ptr, &preamble_ints, sizeof(preamble_ints));
|
|
539
|
+
uint8_t serial_version;
|
|
540
|
+
ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
|
|
541
|
+
uint8_t family_id;
|
|
542
|
+
ptr += copy_from_mem(ptr, &family_id, sizeof(family_id));
|
|
543
|
+
uint8_t flags_byte;
|
|
544
|
+
ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
|
|
545
|
+
uint16_t k;
|
|
546
|
+
ptr += copy_from_mem(ptr, &k, sizeof(k));
|
|
547
|
+
uint8_t m;
|
|
548
|
+
ptr += copy_from_mem(ptr, &m, sizeof(m));
|
|
549
|
+
ptr++; // skip unused byte
|
|
550
|
+
|
|
551
|
+
check_m(m);
|
|
552
|
+
check_preamble_ints(preamble_ints, flags_byte);
|
|
553
|
+
check_serial_version(serial_version);
|
|
554
|
+
check_family_id(family_id);
|
|
555
|
+
ensure_minimum_memory(size, 1 << preamble_ints);
|
|
556
|
+
|
|
557
|
+
const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
|
|
558
|
+
if (is_empty) return kll_sketch<T, C, S, A>(k);
|
|
559
|
+
|
|
560
|
+
uint64_t n;
|
|
561
|
+
uint16_t min_k;
|
|
562
|
+
uint8_t num_levels;
|
|
563
|
+
const bool is_single_item(flags_byte & (1 << flags::IS_SINGLE_ITEM)); // used in serial version 2
|
|
564
|
+
const char* end_ptr = static_cast<const char*>(bytes) + size;
|
|
565
|
+
if (is_single_item) {
|
|
566
|
+
n = 1;
|
|
567
|
+
min_k = k;
|
|
568
|
+
num_levels = 1;
|
|
569
|
+
} else {
|
|
570
|
+
ptr += copy_from_mem(ptr, &n, sizeof(n));
|
|
571
|
+
ptr += copy_from_mem(ptr, &min_k, sizeof(min_k));
|
|
572
|
+
ptr += copy_from_mem(ptr, &num_levels, sizeof(num_levels));
|
|
573
|
+
ptr++; // skip unused byte
|
|
574
|
+
}
|
|
575
|
+
vector_u32<A> levels(num_levels + 1);
|
|
576
|
+
const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
|
|
577
|
+
if (is_single_item) {
|
|
578
|
+
levels[0] = capacity - 1;
|
|
579
|
+
} else {
|
|
580
|
+
// the last integer in levels_ is not serialized because it can be derived
|
|
581
|
+
ptr += copy_from_mem(ptr, levels.data(), sizeof(levels[0]) * num_levels);
|
|
582
|
+
}
|
|
583
|
+
levels[num_levels] = capacity;
|
|
584
|
+
auto item_buffer_deleter = [](T* ptr) { A().deallocate(ptr, 1); };
|
|
585
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(A().allocate(1), item_buffer_deleter);
|
|
586
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(A().allocate(1), item_buffer_deleter);
|
|
587
|
+
std::unique_ptr<T, item_deleter> min_value;
|
|
588
|
+
std::unique_ptr<T, item_deleter> max_value;
|
|
589
|
+
if (!is_single_item) {
|
|
590
|
+
ptr += S().deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
|
|
591
|
+
// serde call did not throw, repackage with destrtuctor
|
|
592
|
+
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
|
|
593
|
+
ptr += S().deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
|
|
594
|
+
// serde call did not throw, repackage with destrtuctor
|
|
595
|
+
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
|
|
596
|
+
}
|
|
597
|
+
auto items_buffer_deleter = [capacity](T* ptr) { A().deallocate(ptr, capacity); };
|
|
598
|
+
std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(A().allocate(capacity), items_buffer_deleter);
|
|
599
|
+
const auto num_items = levels[num_levels] - levels[0];
|
|
600
|
+
ptr += S().deserialize(ptr, end_ptr - ptr, &items_buffer.get()[levels[0]], num_items);
|
|
601
|
+
// serde call did not throw, repackage with destrtuctors
|
|
602
|
+
std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity));
|
|
603
|
+
const size_t delta = ptr - static_cast<const char*>(bytes);
|
|
604
|
+
if (delta != size) throw std::logic_error("deserialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
|
|
605
|
+
const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
|
|
606
|
+
if (is_single_item) {
|
|
607
|
+
new (min_value_buffer.get()) T(items.get()[levels[0]]);
|
|
608
|
+
// copy did not throw, repackage with destrtuctor
|
|
609
|
+
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
|
|
610
|
+
new (max_value_buffer.get()) T(items.get()[levels[0]]);
|
|
611
|
+
// copy did not throw, repackage with destrtuctor
|
|
612
|
+
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
|
|
613
|
+
}
|
|
614
|
+
return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
|
|
615
|
+
std::move(min_value), std::move(max_value), is_level_zero_sorted);
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
/*
|
|
619
|
+
* Gets the normalized rank error given k and pmf.
|
|
620
|
+
* k - the configuration parameter
|
|
621
|
+
* pmf - if true, returns the "double-sided" normalized rank error for the get_PMF() function.
|
|
622
|
+
* Otherwise, it is the "single-sided" normalized rank error for all the other queries.
|
|
623
|
+
* Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials
|
|
624
|
+
*/
|
|
625
|
+
template<typename T, typename C, typename S, typename A>
|
|
626
|
+
double kll_sketch<T, C, S, A>::get_normalized_rank_error(uint16_t k, bool pmf) {
|
|
627
|
+
return pmf
|
|
628
|
+
? 2.446 / pow(k, 0.9433)
|
|
629
|
+
: 2.296 / pow(k, 0.9723);
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
// for deserialization
|
|
633
|
+
template<typename T, typename C, typename S, typename A>
|
|
634
|
+
kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32<A>&& levels,
|
|
635
|
+
std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_value,
|
|
636
|
+
std::unique_ptr<T, item_deleter> max_value, bool is_level_zero_sorted):
|
|
637
|
+
k_(k),
|
|
638
|
+
m_(DEFAULT_M),
|
|
639
|
+
min_k_(min_k),
|
|
640
|
+
n_(n),
|
|
641
|
+
num_levels_(num_levels),
|
|
642
|
+
levels_(std::move(levels)),
|
|
643
|
+
items_(items.release()),
|
|
644
|
+
items_size_(items_size),
|
|
645
|
+
min_value_(min_value.release()),
|
|
646
|
+
max_value_(max_value.release()),
|
|
647
|
+
is_level_zero_sorted_(is_level_zero_sorted)
|
|
648
|
+
{}
|
|
649
|
+
|
|
650
|
+
// The following code is only valid in the special case of exactly reaching capacity while updating.
|
|
651
|
+
// It cannot be used while merging, while reducing k, or anything else.
|
|
652
|
+
template<typename T, typename C, typename S, typename A>
|
|
653
|
+
void kll_sketch<T, C, S, A>::compress_while_updating(void) {
|
|
654
|
+
const uint8_t level = find_level_to_compact();
|
|
655
|
+
|
|
656
|
+
// It is important to add the new top level right here. Be aware that this operation
|
|
657
|
+
// grows the buffer and shifts the data and also the boundaries of the data and grows the
|
|
658
|
+
// levels array and increments num_levels_
|
|
659
|
+
if (level == (num_levels_ - 1)) {
|
|
660
|
+
add_empty_top_level_to_completely_full_sketch();
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
const uint32_t raw_beg = levels_[level];
|
|
664
|
+
const uint32_t raw_lim = levels_[level + 1];
|
|
665
|
+
// +2 is OK because we already added a new top level if necessary
|
|
666
|
+
const uint32_t pop_above = levels_[level + 2] - raw_lim;
|
|
667
|
+
const uint32_t raw_pop = raw_lim - raw_beg;
|
|
668
|
+
const bool odd_pop = kll_helper::is_odd(raw_pop);
|
|
669
|
+
const uint32_t adj_beg = odd_pop ? raw_beg + 1 : raw_beg;
|
|
670
|
+
const uint32_t adj_pop = odd_pop ? raw_pop - 1 : raw_pop;
|
|
671
|
+
const uint32_t half_adj_pop = adj_pop / 2;
|
|
672
|
+
const uint32_t destroy_beg = levels_[0];
|
|
673
|
+
|
|
674
|
+
// level zero might not be sorted, so we must sort it if we wish to compact it
|
|
675
|
+
// sort_level_zero() is not used here because of the adjustment for odd number of items
|
|
676
|
+
if ((level == 0) && !is_level_zero_sorted_) {
|
|
677
|
+
std::sort(&items_[adj_beg], &items_[adj_beg + adj_pop], C());
|
|
678
|
+
}
|
|
679
|
+
if (pop_above == 0) {
|
|
680
|
+
kll_helper::randomly_halve_up(items_, adj_beg, adj_pop);
|
|
681
|
+
} else {
|
|
682
|
+
kll_helper::randomly_halve_down(items_, adj_beg, adj_pop);
|
|
683
|
+
kll_helper::merge_sorted_arrays<T, C>(items_, adj_beg, half_adj_pop, raw_lim, pop_above, adj_beg + half_adj_pop);
|
|
684
|
+
}
|
|
685
|
+
levels_[level + 1] -= half_adj_pop; // adjust boundaries of the level above
|
|
686
|
+
if (odd_pop) {
|
|
687
|
+
levels_[level] = levels_[level + 1] - 1; // the current level now contains one item
|
|
688
|
+
if (levels_[level] != raw_beg) items_[levels_[level]] = std::move(items_[raw_beg]); // namely this leftover guy
|
|
689
|
+
} else {
|
|
690
|
+
levels_[level] = levels_[level + 1]; // the current level is now empty
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
// verify that we freed up half_adj_pop array slots just below the current level
|
|
694
|
+
if (levels_[level] != (raw_beg + half_adj_pop)) throw std::logic_error("compaction error");
|
|
695
|
+
|
|
696
|
+
// finally, we need to shift up the data in the levels below
|
|
697
|
+
// so that the freed-up space can be used by level zero
|
|
698
|
+
if (level > 0) {
|
|
699
|
+
const uint32_t amount = raw_beg - levels_[0];
|
|
700
|
+
std::move_backward(&items_[levels_[0]], &items_[levels_[0] + amount], &items_[levels_[0] + half_adj_pop + amount]);
|
|
701
|
+
for (uint8_t lvl = 0; lvl < level; lvl++) levels_[lvl] += half_adj_pop;
|
|
702
|
+
}
|
|
703
|
+
for (uint32_t i = 0; i < half_adj_pop; i++) items_[i + destroy_beg].~T();
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
template<typename T, typename C, typename S, typename A>
|
|
707
|
+
uint8_t kll_sketch<T, C, S, A>::find_level_to_compact() const {
|
|
708
|
+
uint8_t level = 0;
|
|
709
|
+
while (true) {
|
|
710
|
+
if (level >= num_levels_) throw std::logic_error("capacity calculation error");
|
|
711
|
+
const uint32_t pop = levels_[level + 1] - levels_[level];
|
|
712
|
+
const uint32_t cap = kll_helper::level_capacity(k_, num_levels_, level, m_);
|
|
713
|
+
if (pop >= cap) {
|
|
714
|
+
return level;
|
|
715
|
+
}
|
|
716
|
+
level++;
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
template<typename T, typename C, typename S, typename A>
|
|
721
|
+
void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
|
|
722
|
+
const uint32_t cur_total_cap = levels_[num_levels_];
|
|
723
|
+
|
|
724
|
+
// make sure that we are following a certain growth scheme
|
|
725
|
+
if (levels_[0] != 0) throw std::logic_error("full sketch expected");
|
|
726
|
+
if (items_size_ != cur_total_cap) throw std::logic_error("current capacity mismatch");
|
|
727
|
+
|
|
728
|
+
// note that merging MIGHT over-grow levels_, in which case we might not have to grow it here
|
|
729
|
+
const uint8_t new_levels_size = num_levels_ + 2;
|
|
730
|
+
if (levels_.size() < new_levels_size) {
|
|
731
|
+
levels_.resize(new_levels_size);
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
const uint32_t delta_cap = kll_helper::level_capacity(k_, num_levels_ + 1, 0, m_);
|
|
735
|
+
const uint32_t new_total_cap = cur_total_cap + delta_cap;
|
|
736
|
+
|
|
737
|
+
// move (and shift) the current data into the new buffer
|
|
738
|
+
T* new_buf = A().allocate(new_total_cap);
|
|
739
|
+
kll_helper::move_construct<T>(items_, 0, cur_total_cap, new_buf, delta_cap, true);
|
|
740
|
+
A().deallocate(items_, items_size_);
|
|
741
|
+
items_ = new_buf;
|
|
742
|
+
items_size_ = new_total_cap;
|
|
743
|
+
|
|
744
|
+
// this loop includes the old "extra" index at the top
|
|
745
|
+
for (uint8_t i = 0; i <= num_levels_; i++) {
|
|
746
|
+
levels_[i] += delta_cap;
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
if (levels_[num_levels_] != new_total_cap) throw std::logic_error("new capacity mismatch");
|
|
750
|
+
|
|
751
|
+
num_levels_++;
|
|
752
|
+
levels_[num_levels_] = new_total_cap; // initialize the new "extra" index at the top
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
template<typename T, typename C, typename S, typename A>
|
|
756
|
+
void kll_sketch<T, C, S, A>::sort_level_zero() {
|
|
757
|
+
if (!is_level_zero_sorted_) {
|
|
758
|
+
std::sort(&items_[levels_[0]], &items_[levels_[1]], C());
|
|
759
|
+
is_level_zero_sorted_ = true;
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
template<typename T, typename C, typename S, typename A>
|
|
764
|
+
std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> kll_sketch<T, C, S, A>::get_quantile_calculator() {
|
|
765
|
+
sort_level_zero();
|
|
766
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<kll_quantile_calculator<T, C, A>> AllocCalc;
|
|
767
|
+
std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> quantile_calculator(
|
|
768
|
+
new (AllocCalc().allocate(1)) kll_quantile_calculator<T, C, A>(items_, levels_.data(), num_levels_, n_),
|
|
769
|
+
[](kll_quantile_calculator<T, C, A>* ptr){ ptr->~kll_quantile_calculator<T, C, A>(); AllocCalc().deallocate(ptr, 1); }
|
|
770
|
+
);
|
|
771
|
+
return quantile_calculator;
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
template<typename T, typename C, typename S, typename A>
|
|
775
|
+
vector_d<A> kll_sketch<T, C, S, A>::get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const {
|
|
776
|
+
if (is_empty()) return vector_d<A>();
|
|
777
|
+
kll_helper::validate_values<T, C>(split_points, size);
|
|
778
|
+
vector_d<A> buckets(size + 1, 0);
|
|
779
|
+
uint8_t level = 0;
|
|
780
|
+
uint64_t weight = 1;
|
|
781
|
+
while (level < num_levels_) {
|
|
782
|
+
const auto from_index = levels_[level];
|
|
783
|
+
const auto to_index = levels_[level + 1]; // exclusive
|
|
784
|
+
if ((level == 0) && !is_level_zero_sorted_) {
|
|
785
|
+
increment_buckets_unsorted_level(from_index, to_index, weight, split_points, size, buckets.data());
|
|
786
|
+
} else {
|
|
787
|
+
increment_buckets_sorted_level(from_index, to_index, weight, split_points, size, buckets.data());
|
|
788
|
+
}
|
|
789
|
+
level++;
|
|
790
|
+
weight *= 2;
|
|
791
|
+
}
|
|
792
|
+
// normalize and, if CDF, convert to cumulative
|
|
793
|
+
if (is_CDF) {
|
|
794
|
+
double subtotal = 0;
|
|
795
|
+
for (uint32_t i = 0; i <= size; i++) {
|
|
796
|
+
subtotal += buckets[i];
|
|
797
|
+
buckets[i] = subtotal / n_;
|
|
798
|
+
}
|
|
799
|
+
} else {
|
|
800
|
+
for (uint32_t i = 0; i <= size; i++) {
|
|
801
|
+
buckets[i] /= n_;
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
return buckets;
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
template<typename T, typename C, typename S, typename A>
|
|
808
|
+
void kll_sketch<T, C, S, A>::increment_buckets_unsorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
|
|
809
|
+
const T* split_points, uint32_t size, double* buckets) const
|
|
810
|
+
{
|
|
811
|
+
for (uint32_t i = from_index; i < to_index; i++) {
|
|
812
|
+
uint32_t j;
|
|
813
|
+
for (j = 0; j < size; j++) {
|
|
814
|
+
if (C()(items_[i], split_points[j])) {
|
|
815
|
+
break;
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
buckets[j] += weight;
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
template<typename T, typename C, typename S, typename A>
|
|
823
|
+
void kll_sketch<T, C, S, A>::increment_buckets_sorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
|
|
824
|
+
const T* split_points, uint32_t size, double* buckets) const
|
|
825
|
+
{
|
|
826
|
+
uint32_t i = from_index;
|
|
827
|
+
uint32_t j = 0;
|
|
828
|
+
while ((i < to_index) && (j < size)) {
|
|
829
|
+
if (C()(items_[i], split_points[j])) {
|
|
830
|
+
buckets[j] += weight; // this sample goes into this bucket
|
|
831
|
+
i++; // move on to next sample and see whether it also goes into this bucket
|
|
832
|
+
} else {
|
|
833
|
+
j++; // no more samples for this bucket
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
// now either i == to_index (we are out of samples), or
|
|
837
|
+
// j == size (we are out of buckets, but there are more samples remaining)
|
|
838
|
+
// we only need to do something in the latter case
|
|
839
|
+
if (j == size) {
|
|
840
|
+
buckets[j] += weight * (to_index - i);
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
template<typename T, typename C, typename S, typename A>
|
|
845
|
+
template<typename O>
|
|
846
|
+
void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
|
|
847
|
+
const uint32_t tmp_num_items = get_num_retained() + other.get_num_retained_above_level_zero();
|
|
848
|
+
auto tmp_items_deleter = [tmp_num_items](T* ptr) { A().deallocate(ptr, tmp_num_items); }; // no destructor needed
|
|
849
|
+
const std::unique_ptr<T, decltype(tmp_items_deleter)> workbuf(A().allocate(tmp_num_items), tmp_items_deleter);
|
|
850
|
+
const uint8_t ub = kll_helper::ub_on_num_levels(final_n);
|
|
851
|
+
const size_t work_levels_size = ub + 2; // ub+1 does not work
|
|
852
|
+
vector_u32<A> worklevels(work_levels_size);
|
|
853
|
+
vector_u32<A> outlevels(work_levels_size);
|
|
854
|
+
|
|
855
|
+
const uint8_t provisional_num_levels = std::max(num_levels_, other.num_levels_);
|
|
856
|
+
|
|
857
|
+
populate_work_arrays(std::forward<O>(other), workbuf.get(), worklevels.data(), provisional_num_levels);
|
|
858
|
+
|
|
859
|
+
const kll_helper::compress_result result = kll_helper::general_compress<T, C>(k_, m_, provisional_num_levels, workbuf.get(),
|
|
860
|
+
worklevels.data(), outlevels.data(), is_level_zero_sorted_);
|
|
861
|
+
|
|
862
|
+
// ub can sometimes be much bigger
|
|
863
|
+
if (result.final_num_levels > ub) throw std::logic_error("merge error");
|
|
864
|
+
|
|
865
|
+
// now we need to transfer the results back into "this" sketch
|
|
866
|
+
if (result.final_capacity != items_size_) {
|
|
867
|
+
A().deallocate(items_, items_size_);
|
|
868
|
+
items_size_ = result.final_capacity;
|
|
869
|
+
items_ = A().allocate(items_size_);
|
|
870
|
+
}
|
|
871
|
+
const uint32_t free_space_at_bottom = result.final_capacity - result.final_num_items;
|
|
872
|
+
kll_helper::move_construct<T>(workbuf.get(), outlevels[0], outlevels[0] + result.final_num_items, items_, free_space_at_bottom, true);
|
|
873
|
+
|
|
874
|
+
const size_t new_levels_size = result.final_num_levels + 1;
|
|
875
|
+
if (levels_.size() < new_levels_size) {
|
|
876
|
+
levels_.resize(new_levels_size);
|
|
877
|
+
}
|
|
878
|
+
const uint32_t offset = free_space_at_bottom - outlevels[0];
|
|
879
|
+
for (uint8_t lvl = 0; lvl < levels_.size(); lvl++) { // includes the "extra" index
|
|
880
|
+
levels_[lvl] = outlevels[lvl] + offset;
|
|
881
|
+
}
|
|
882
|
+
num_levels_ = result.final_num_levels;
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
// this leaves items_ uninitialized (all objects moved out and destroyed)
|
|
886
|
+
// this version copies objects from the incoming sketch
|
|
887
|
+
template<typename T, typename C, typename S, typename A>
|
|
888
|
+
void kll_sketch<T, C, S, A>::populate_work_arrays(const kll_sketch& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
|
|
889
|
+
worklevels[0] = 0;
|
|
890
|
+
|
|
891
|
+
// the level zero data from "other" was already inserted into "this"
|
|
892
|
+
kll_helper::move_construct<T>(items_, levels_[0], levels_[1], workbuf, 0, true);
|
|
893
|
+
worklevels[1] = safe_level_size(0);
|
|
894
|
+
|
|
895
|
+
for (uint8_t lvl = 1; lvl < provisional_num_levels; lvl++) {
|
|
896
|
+
const uint32_t self_pop = safe_level_size(lvl);
|
|
897
|
+
const uint32_t other_pop = other.safe_level_size(lvl);
|
|
898
|
+
worklevels[lvl + 1] = worklevels[lvl] + self_pop + other_pop;
|
|
899
|
+
|
|
900
|
+
if ((self_pop > 0) && (other_pop == 0)) {
|
|
901
|
+
kll_helper::move_construct<T>(items_, levels_[lvl], levels_[lvl] + self_pop, workbuf, worklevels[lvl], true);
|
|
902
|
+
} else if ((self_pop == 0) && (other_pop > 0)) {
|
|
903
|
+
kll_helper::copy_construct<T>(other.items_, other.levels_[lvl], other.levels_[lvl] + other_pop, workbuf, worklevels[lvl]);
|
|
904
|
+
} else if ((self_pop > 0) && (other_pop > 0)) {
|
|
905
|
+
kll_helper::merge_sorted_arrays<T, C>(items_, levels_[lvl], self_pop, other.items_, other.levels_[lvl], other_pop, workbuf, worklevels[lvl]);
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
// this leaves items_ uninitialized (all objects moved out and destroyed)
|
|
911
|
+
// this version moves objects from the incoming sketch
|
|
912
|
+
template<typename T, typename C, typename S, typename A>
|
|
913
|
+
void kll_sketch<T, C, S, A>::populate_work_arrays(kll_sketch&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
|
|
914
|
+
worklevels[0] = 0;
|
|
915
|
+
|
|
916
|
+
// the level zero data from "other" was already inserted into "this"
|
|
917
|
+
kll_helper::move_construct<T>(items_, levels_[0], levels_[1], workbuf, 0, true);
|
|
918
|
+
worklevels[1] = safe_level_size(0);
|
|
919
|
+
|
|
920
|
+
for (uint8_t lvl = 1; lvl < provisional_num_levels; lvl++) {
|
|
921
|
+
const uint32_t self_pop = safe_level_size(lvl);
|
|
922
|
+
const uint32_t other_pop = other.safe_level_size(lvl);
|
|
923
|
+
worklevels[lvl + 1] = worklevels[lvl] + self_pop + other_pop;
|
|
924
|
+
|
|
925
|
+
if ((self_pop > 0) && (other_pop == 0)) {
|
|
926
|
+
kll_helper::move_construct<T>(items_, levels_[lvl], levels_[lvl] + self_pop, workbuf, worklevels[lvl], true);
|
|
927
|
+
} else if ((self_pop == 0) && (other_pop > 0)) {
|
|
928
|
+
kll_helper::move_construct<T>(other.items_, other.levels_[lvl], other.levels_[lvl] + other_pop, workbuf, worklevels[lvl], false);
|
|
929
|
+
} else if ((self_pop > 0) && (other_pop > 0)) {
|
|
930
|
+
kll_helper::merge_sorted_arrays<T, C>(items_, levels_[lvl], self_pop, other.items_, other.levels_[lvl], other_pop, workbuf, worklevels[lvl]);
|
|
931
|
+
}
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
template<typename T, typename C, typename S, typename A>
|
|
936
|
+
void kll_sketch<T, C, S, A>::assert_correct_total_weight() const {
|
|
937
|
+
const uint64_t total(kll_helper::sum_the_sample_weights(num_levels_, levels_.data()));
|
|
938
|
+
if (total != n_) {
|
|
939
|
+
throw std::logic_error("Total weight does not match N");
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
template<typename T, typename C, typename S, typename A>
|
|
944
|
+
uint32_t kll_sketch<T, C, S, A>::safe_level_size(uint8_t level) const {
|
|
945
|
+
if (level >= num_levels_) return 0;
|
|
946
|
+
return levels_[level + 1] - levels_[level];
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
template<typename T, typename C, typename S, typename A>
|
|
950
|
+
uint32_t kll_sketch<T, C, S, A>::get_num_retained_above_level_zero() const {
|
|
951
|
+
if (num_levels_ == 1) return 0;
|
|
952
|
+
return levels_[num_levels_] - levels_[1];
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
template<typename T, typename C, typename S, typename A>
|
|
956
|
+
void kll_sketch<T, C, S, A>::check_m(uint8_t m) {
|
|
957
|
+
if (m != DEFAULT_M) {
|
|
958
|
+
throw std::invalid_argument("Possible corruption: M must be " + std::to_string(DEFAULT_M)
|
|
959
|
+
+ ": " + std::to_string(m));
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
template<typename T, typename C, typename S, typename A>
|
|
964
|
+
void kll_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t flags_byte) {
|
|
965
|
+
const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
|
|
966
|
+
const bool is_single_item(flags_byte & (1 << flags::IS_SINGLE_ITEM));
|
|
967
|
+
if (is_empty || is_single_item) {
|
|
968
|
+
if (preamble_ints != PREAMBLE_INTS_SHORT) {
|
|
969
|
+
throw std::invalid_argument("Possible corruption: preamble ints must be "
|
|
970
|
+
+ std::to_string(PREAMBLE_INTS_SHORT) + " for an empty or single item sketch: " + std::to_string(preamble_ints));
|
|
971
|
+
}
|
|
972
|
+
} else {
|
|
973
|
+
if (preamble_ints != PREAMBLE_INTS_FULL) {
|
|
974
|
+
throw std::invalid_argument("Possible corruption: preamble ints must be "
|
|
975
|
+
+ std::to_string(PREAMBLE_INTS_FULL) + " for a sketch with more than one item: " + std::to_string(preamble_ints));
|
|
976
|
+
}
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
template<typename T, typename C, typename S, typename A>
|
|
981
|
+
void kll_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
|
|
982
|
+
if (serial_version != SERIAL_VERSION_1 && serial_version != SERIAL_VERSION_2) {
|
|
983
|
+
throw std::invalid_argument("Possible corruption: serial version mismatch: expected "
|
|
984
|
+
+ std::to_string(SERIAL_VERSION_1) + " or " + std::to_string(SERIAL_VERSION_2)
|
|
985
|
+
+ ", got " + std::to_string(serial_version));
|
|
986
|
+
}
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
template<typename T, typename C, typename S, typename A>
|
|
990
|
+
void kll_sketch<T, C, S, A>::check_family_id(uint8_t family_id) {
|
|
991
|
+
if (family_id != FAMILY) {
|
|
992
|
+
throw std::invalid_argument("Possible corruption: family mismatch: expected "
|
|
993
|
+
+ std::to_string(FAMILY) + ", got " + std::to_string(family_id));
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
template <typename T, typename C, typename S, typename A>
|
|
998
|
+
string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items) const {
|
|
999
|
+
std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
|
|
1000
|
+
os << "### KLL sketch summary:" << std::endl;
|
|
1001
|
+
os << " K : " << k_ << std::endl;
|
|
1002
|
+
os << " min K : " << min_k_ << std::endl;
|
|
1003
|
+
os << " M : " << (unsigned int) m_ << std::endl;
|
|
1004
|
+
os << " N : " << n_ << std::endl;
|
|
1005
|
+
os << " Epsilon : " << std::setprecision(3) << get_normalized_rank_error(false) * 100 << "%" << std::endl;
|
|
1006
|
+
os << " Epsilon PMF : " << get_normalized_rank_error(true) * 100 << "%" << std::endl;
|
|
1007
|
+
os << " Empty : " << (is_empty() ? "true" : "false") << std::endl;
|
|
1008
|
+
os << " Estimation mode: " << (is_estimation_mode() ? "true" : "false") << std::endl;
|
|
1009
|
+
os << " Levels : " << (unsigned int) num_levels_ << std::endl;
|
|
1010
|
+
os << " Sorted : " << (is_level_zero_sorted_ ? "true" : "false") << std::endl;
|
|
1011
|
+
os << " Capacity items : " << items_size_ << std::endl;
|
|
1012
|
+
os << " Retained items : " << get_num_retained() << std::endl;
|
|
1013
|
+
os << " Storage bytes : " << get_serialized_size_bytes() << std::endl;
|
|
1014
|
+
if (!is_empty()) {
|
|
1015
|
+
os << " Min value : " << *min_value_ << std::endl;
|
|
1016
|
+
os << " Max value : " << *max_value_ << std::endl;
|
|
1017
|
+
}
|
|
1018
|
+
os << "### End sketch summary" << std::endl;
|
|
1019
|
+
|
|
1020
|
+
if (print_levels) {
|
|
1021
|
+
os << "### KLL sketch levels:" << std::endl;
|
|
1022
|
+
os << " index: nominal capacity, actual size" << std::endl;
|
|
1023
|
+
for (uint8_t i = 0; i < num_levels_; i++) {
|
|
1024
|
+
os << " " << (unsigned int) i << ": " << kll_helper::level_capacity(k_, num_levels_, i, m_) << ", " << safe_level_size(i) << std::endl;
|
|
1025
|
+
}
|
|
1026
|
+
os << "### End sketch levels" << std::endl;
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
if (print_items) {
|
|
1030
|
+
os << "### KLL sketch data:" << std::endl;
|
|
1031
|
+
uint8_t level = 0;
|
|
1032
|
+
while (level < num_levels_) {
|
|
1033
|
+
const uint32_t from_index = levels_[level];
|
|
1034
|
+
const uint32_t to_index = levels_[level + 1]; // exclusive
|
|
1035
|
+
if (from_index < to_index) {
|
|
1036
|
+
os << " level " << (unsigned int) level << ":" << std::endl;
|
|
1037
|
+
}
|
|
1038
|
+
for (uint32_t i = from_index; i < to_index; i++) {
|
|
1039
|
+
os << " " << items_[i] << std::endl;
|
|
1040
|
+
}
|
|
1041
|
+
level++;
|
|
1042
|
+
}
|
|
1043
|
+
os << "### End sketch data" << std::endl;
|
|
1044
|
+
}
|
|
1045
|
+
return os.str();
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
template <typename T, typename C, typename S, typename A>
|
|
1049
|
+
typename kll_sketch<T, C, S, A>::const_iterator kll_sketch<T, C, S, A>::begin() const {
|
|
1050
|
+
return kll_sketch<T, C, S, A>::const_iterator(items_, levels_.data(), num_levels_);
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
template <typename T, typename C, typename S, typename A>
|
|
1054
|
+
typename kll_sketch<T, C, S, A>::const_iterator kll_sketch<T, C, S, A>::end() const {
|
|
1055
|
+
return kll_sketch<T, C, S, A>::const_iterator(nullptr, nullptr, num_levels_);
|
|
1056
|
+
}
|
|
1057
|
+
|
|
1058
|
+
// kll_sketch::const_iterator implementation
|
|
1059
|
+
|
|
1060
|
+
template<typename T, typename C, typename S, typename A>
|
|
1061
|
+
kll_sketch<T, C, S, A>::const_iterator::const_iterator(const T* items, const uint32_t* levels, const uint8_t num_levels):
|
|
1062
|
+
items(items), levels(levels), num_levels(num_levels), index(levels == nullptr ? 0 : levels[0]), level(levels == nullptr ? num_levels : 0), weight(1)
|
|
1063
|
+
{}
|
|
1064
|
+
|
|
1065
|
+
template<typename T, typename C, typename S, typename A>
|
|
1066
|
+
typename kll_sketch<T, C, S, A>::const_iterator& kll_sketch<T, C, S, A>::const_iterator::operator++() {
|
|
1067
|
+
++index;
|
|
1068
|
+
if (index == levels[level + 1]) { // go to the next non-empty level
|
|
1069
|
+
do {
|
|
1070
|
+
++level;
|
|
1071
|
+
weight *= 2;
|
|
1072
|
+
} while (level < num_levels && levels[level] == levels[level + 1]);
|
|
1073
|
+
}
|
|
1074
|
+
return *this;
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
template<typename T, typename C, typename S, typename A>
|
|
1078
|
+
typename kll_sketch<T, C, S, A>::const_iterator& kll_sketch<T, C, S, A>::const_iterator::operator++(int) {
|
|
1079
|
+
const_iterator tmp(*this);
|
|
1080
|
+
operator++();
|
|
1081
|
+
return tmp;
|
|
1082
|
+
}
|
|
1083
|
+
|
|
1084
|
+
template<typename T, typename C, typename S, typename A>
|
|
1085
|
+
bool kll_sketch<T, C, S, A>::const_iterator::operator==(const const_iterator& other) const {
|
|
1086
|
+
if (level != other.level) return false;
|
|
1087
|
+
if (level == num_levels) return true; // end
|
|
1088
|
+
return index == other.index;
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
template<typename T, typename C, typename S, typename A>
|
|
1092
|
+
bool kll_sketch<T, C, S, A>::const_iterator::operator!=(const const_iterator& other) const {
|
|
1093
|
+
return !operator==(other);
|
|
1094
|
+
}
|
|
1095
|
+
|
|
1096
|
+
template<typename T, typename C, typename S, typename A>
|
|
1097
|
+
const std::pair<const T&, const uint64_t> kll_sketch<T, C, S, A>::const_iterator::operator*() const {
|
|
1098
|
+
return std::pair<const T&, const uint64_t>(items[index], weight);
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
template<typename T, typename C, typename S, typename A>
|
|
1102
|
+
class kll_sketch<T, C, S, A>::item_deleter {
|
|
1103
|
+
public:
|
|
1104
|
+
void operator() (T* ptr) const {
|
|
1105
|
+
if (ptr != nullptr) {
|
|
1106
|
+
ptr->~T();
|
|
1107
|
+
A().deallocate(ptr, 1);
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
};
|
|
1111
|
+
|
|
1112
|
+
template<typename T, typename C, typename S, typename A>
|
|
1113
|
+
class kll_sketch<T, C, S, A>::items_deleter {
|
|
1114
|
+
public:
|
|
1115
|
+
items_deleter(uint32_t start, uint32_t num): start(start), num(num) {}
|
|
1116
|
+
void operator() (T* ptr) const {
|
|
1117
|
+
if (ptr != nullptr) {
|
|
1118
|
+
for (uint32_t i = start; i < num; ++i) {
|
|
1119
|
+
ptr[i].~T();
|
|
1120
|
+
}
|
|
1121
|
+
A().deallocate(ptr, num);
|
|
1122
|
+
}
|
|
1123
|
+
}
|
|
1124
|
+
private:
|
|
1125
|
+
uint32_t start;
|
|
1126
|
+
uint32_t num;
|
|
1127
|
+
};
|
|
1128
|
+
|
|
1129
|
+
} /* namespace datasketches */
|
|
1130
|
+
|
|
1131
|
+
#endif
|