datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef _HLLSKETCHIMPL_INTERNAL_HPP_
|
|
21
|
+
#define _HLLSKETCHIMPL_INTERNAL_HPP_
|
|
22
|
+
|
|
23
|
+
#include "HllSketchImpl.hpp"
|
|
24
|
+
#include "HllSketchImplFactory.hpp"
|
|
25
|
+
|
|
26
|
+
namespace datasketches {
|
|
27
|
+
|
|
28
|
+
template<typename A>
|
|
29
|
+
HllSketchImpl<A>::HllSketchImpl(const int lgConfigK, const target_hll_type tgtHllType,
|
|
30
|
+
const hll_mode mode, const bool startFullSize)
|
|
31
|
+
: lgConfigK(lgConfigK),
|
|
32
|
+
tgtHllType(tgtHllType),
|
|
33
|
+
mode(mode),
|
|
34
|
+
startFullSize(startFullSize)
|
|
35
|
+
{
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
template<typename A>
|
|
39
|
+
HllSketchImpl<A>::~HllSketchImpl() {
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
template<typename A>
|
|
43
|
+
target_hll_type HllSketchImpl<A>::extractTgtHllType(const uint8_t modeByte) {
|
|
44
|
+
switch ((modeByte >> 2) & 0x3) {
|
|
45
|
+
case 0:
|
|
46
|
+
return target_hll_type::HLL_4;
|
|
47
|
+
case 1:
|
|
48
|
+
return target_hll_type::HLL_6;
|
|
49
|
+
case 2:
|
|
50
|
+
return target_hll_type::HLL_8;
|
|
51
|
+
default:
|
|
52
|
+
throw std::invalid_argument("Invalid target HLL type");
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
template<typename A>
|
|
57
|
+
hll_mode HllSketchImpl<A>::extractCurMode(const uint8_t modeByte) {
|
|
58
|
+
switch (modeByte & 0x3) {
|
|
59
|
+
case 0:
|
|
60
|
+
return hll_mode::LIST;
|
|
61
|
+
case 1:
|
|
62
|
+
return hll_mode::SET;
|
|
63
|
+
case 2:
|
|
64
|
+
return hll_mode::HLL;
|
|
65
|
+
default:
|
|
66
|
+
throw std::invalid_argument("Invalid current sketch mode");
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
template<typename A>
|
|
71
|
+
uint8_t HllSketchImpl<A>::makeFlagsByte(const bool compact) const {
|
|
72
|
+
uint8_t flags(0);
|
|
73
|
+
flags |= (isEmpty() ? HllUtil<A>::EMPTY_FLAG_MASK : 0);
|
|
74
|
+
flags |= (compact ? HllUtil<A>::COMPACT_FLAG_MASK : 0);
|
|
75
|
+
flags |= (isOutOfOrderFlag() ? HllUtil<A>::OUT_OF_ORDER_FLAG_MASK : 0);
|
|
76
|
+
flags |= (startFullSize ? HllUtil<A>::FULL_SIZE_FLAG_MASK : 0);
|
|
77
|
+
return flags;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// lo2bits = curMode, next 2 bits = tgtHllType
|
|
81
|
+
// Dec Lo4Bits TgtHllType, CurMode
|
|
82
|
+
// 0 0000 HLL_4, LIST
|
|
83
|
+
// 1 0001 HLL_4, SET
|
|
84
|
+
// 2 0010 HLL_4, HLL
|
|
85
|
+
// 4 0100 HLL_6, LIST
|
|
86
|
+
// 5 0101 HLL_6, SET
|
|
87
|
+
// 6 0110 HLL_6, HLL
|
|
88
|
+
// 8 1000 HLL_8, LIST
|
|
89
|
+
// 9 1001 HLL_8, SET
|
|
90
|
+
// 10 1010 HLL_8, HLL
|
|
91
|
+
template<typename A>
|
|
92
|
+
uint8_t HllSketchImpl<A>::makeModeByte() const {
|
|
93
|
+
uint8_t byte = 0;
|
|
94
|
+
|
|
95
|
+
switch (mode) {
|
|
96
|
+
case LIST:
|
|
97
|
+
byte = 0;
|
|
98
|
+
break;
|
|
99
|
+
case SET:
|
|
100
|
+
byte = 1;
|
|
101
|
+
break;
|
|
102
|
+
case HLL:
|
|
103
|
+
byte = 2;
|
|
104
|
+
break;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
switch (tgtHllType) {
|
|
108
|
+
case HLL_4:
|
|
109
|
+
byte |= (0 << 2); // for completeness
|
|
110
|
+
break;
|
|
111
|
+
case HLL_6:
|
|
112
|
+
byte |= (1 << 2);
|
|
113
|
+
break;
|
|
114
|
+
case HLL_8:
|
|
115
|
+
byte |= (2 << 2);
|
|
116
|
+
break;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return byte;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
template<typename A>
|
|
123
|
+
HllSketchImpl<A>* HllSketchImpl<A>::reset() {
|
|
124
|
+
return HllSketchImplFactory<A>::reset(this, startFullSize);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
template<typename A>
|
|
128
|
+
target_hll_type HllSketchImpl<A>::getTgtHllType() const {
|
|
129
|
+
return tgtHllType;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
template<typename A>
|
|
133
|
+
int HllSketchImpl<A>::getLgConfigK() const {
|
|
134
|
+
return lgConfigK;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
template<typename A>
|
|
138
|
+
hll_mode HllSketchImpl<A>::getCurMode() const {
|
|
139
|
+
return mode;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
template<typename A>
|
|
143
|
+
bool HllSketchImpl<A>::isStartFullSize() const {
|
|
144
|
+
return startFullSize;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
#endif // _HLLSKETCHIMPL_INTERNAL_HPP_
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef _HLLSKETCHIMPL_HPP_
|
|
21
|
+
#define _HLLSKETCHIMPL_HPP_
|
|
22
|
+
|
|
23
|
+
#include "HllUtil.hpp"
|
|
24
|
+
#include "hll.hpp" // for TgtHllType
|
|
25
|
+
|
|
26
|
+
#include <memory>
|
|
27
|
+
|
|
28
|
+
namespace datasketches {
|
|
29
|
+
|
|
30
|
+
template<typename A = std::allocator<char>>
|
|
31
|
+
class HllSketchImpl {
|
|
32
|
+
public:
|
|
33
|
+
HllSketchImpl(int lgConfigK, target_hll_type tgtHllType, hll_mode mode, bool startFullSize);
|
|
34
|
+
virtual ~HllSketchImpl();
|
|
35
|
+
|
|
36
|
+
virtual void serialize(std::ostream& os, bool compact) const = 0;
|
|
37
|
+
virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const = 0;
|
|
38
|
+
|
|
39
|
+
virtual HllSketchImpl* copy() const = 0;
|
|
40
|
+
virtual HllSketchImpl* copyAs(target_hll_type tgtHllType) const = 0;
|
|
41
|
+
HllSketchImpl<A>* reset();
|
|
42
|
+
|
|
43
|
+
virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const = 0;
|
|
44
|
+
|
|
45
|
+
virtual HllSketchImpl* couponUpdate(int coupon) = 0;
|
|
46
|
+
|
|
47
|
+
hll_mode getCurMode() const;
|
|
48
|
+
|
|
49
|
+
virtual double getEstimate() const = 0;
|
|
50
|
+
virtual double getCompositeEstimate() const = 0;
|
|
51
|
+
virtual double getUpperBound(int numStdDev) const = 0;
|
|
52
|
+
virtual double getLowerBound(int numStdDev) const = 0;
|
|
53
|
+
|
|
54
|
+
inline int getLgConfigK() const;
|
|
55
|
+
|
|
56
|
+
virtual int getMemDataStart() const = 0;
|
|
57
|
+
|
|
58
|
+
virtual int getPreInts() const = 0;
|
|
59
|
+
|
|
60
|
+
target_hll_type getTgtHllType() const;
|
|
61
|
+
|
|
62
|
+
virtual int getUpdatableSerializationBytes() const = 0;
|
|
63
|
+
virtual int getCompactSerializationBytes() const = 0;
|
|
64
|
+
|
|
65
|
+
virtual bool isCompact() const = 0;
|
|
66
|
+
virtual bool isEmpty() const = 0;
|
|
67
|
+
virtual bool isOutOfOrderFlag() const = 0;
|
|
68
|
+
virtual void putOutOfOrderFlag(bool oooFlag) = 0;
|
|
69
|
+
bool isStartFullSize() const;
|
|
70
|
+
|
|
71
|
+
protected:
|
|
72
|
+
static target_hll_type extractTgtHllType(uint8_t modeByte);
|
|
73
|
+
static hll_mode extractCurMode(uint8_t modeByte);
|
|
74
|
+
uint8_t makeFlagsByte(bool compact) const;
|
|
75
|
+
uint8_t makeModeByte() const;
|
|
76
|
+
|
|
77
|
+
const int lgConfigK;
|
|
78
|
+
const target_hll_type tgtHllType;
|
|
79
|
+
const hll_mode mode;
|
|
80
|
+
const bool startFullSize;
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
#endif // _HLLSKETCHIMPL_HPP_
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef _HLLSKETCHIMPLFACTORY_HPP_
|
|
21
|
+
#define _HLLSKETCHIMPLFACTORY_HPP_
|
|
22
|
+
|
|
23
|
+
#include "HllUtil.hpp"
|
|
24
|
+
#include "HllSketchImpl.hpp"
|
|
25
|
+
#include "CouponList.hpp"
|
|
26
|
+
#include "CouponHashSet.hpp"
|
|
27
|
+
#include "HllArray.hpp"
|
|
28
|
+
#include "Hll4Array.hpp"
|
|
29
|
+
#include "Hll6Array.hpp"
|
|
30
|
+
#include "Hll8Array.hpp"
|
|
31
|
+
|
|
32
|
+
namespace datasketches {
|
|
33
|
+
|
|
34
|
+
template<typename A = std::allocator<char>>
|
|
35
|
+
class HllSketchImplFactory final {
|
|
36
|
+
public:
|
|
37
|
+
static HllSketchImpl<A>* deserialize(std::istream& os);
|
|
38
|
+
static HllSketchImpl<A>* deserialize(const void* bytes, size_t len);
|
|
39
|
+
|
|
40
|
+
static CouponHashSet<A>* promoteListToSet(const CouponList<A>& list);
|
|
41
|
+
static HllArray<A>* promoteListOrSetToHll(const CouponList<A>& list);
|
|
42
|
+
static HllArray<A>* newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize = false);
|
|
43
|
+
|
|
44
|
+
// resets the input impl, deleting the input pointer and returning a new pointer
|
|
45
|
+
static HllSketchImpl<A>* reset(HllSketchImpl<A>* impl, bool startFullSize);
|
|
46
|
+
|
|
47
|
+
static Hll4Array<A>* convertToHll4(const HllArray<A>& srcHllArr);
|
|
48
|
+
static Hll6Array<A>* convertToHll6(const HllArray<A>& srcHllArr);
|
|
49
|
+
static Hll8Array<A>* convertToHll8(const HllArray<A>& srcHllArr);
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
template<typename A>
|
|
53
|
+
CouponHashSet<A>* HllSketchImplFactory<A>::promoteListToSet(const CouponList<A>& list) {
|
|
54
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>> chsAlloc;
|
|
55
|
+
CouponHashSet<A>* chSet = new (chsAlloc().allocate(1)) CouponHashSet<A>(list.getLgConfigK(), list.getTgtHllType());
|
|
56
|
+
for (auto coupon: list) {
|
|
57
|
+
chSet->couponUpdate(coupon);
|
|
58
|
+
}
|
|
59
|
+
return chSet;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
template<typename A>
|
|
63
|
+
HllArray<A>* HllSketchImplFactory<A>::promoteListOrSetToHll(const CouponList<A>& src) {
|
|
64
|
+
HllArray<A>* tgtHllArr = HllSketchImplFactory<A>::newHll(src.getLgConfigK(), src.getTgtHllType());
|
|
65
|
+
tgtHllArr->putKxQ0(1 << src.getLgConfigK());
|
|
66
|
+
for (auto coupon: src) {
|
|
67
|
+
tgtHllArr->couponUpdate(coupon);
|
|
68
|
+
}
|
|
69
|
+
tgtHllArr->putHipAccum(src.getEstimate());
|
|
70
|
+
tgtHllArr->putOutOfOrderFlag(false);
|
|
71
|
+
return tgtHllArr;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
template<typename A>
|
|
75
|
+
HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(std::istream& is) {
|
|
76
|
+
// we'll hand off the sketch based on PreInts so we don't need
|
|
77
|
+
// to move the stream pointer back and forth -- perhaps somewhat fragile?
|
|
78
|
+
const int preInts = is.peek();
|
|
79
|
+
if (preInts == HllUtil<A>::HLL_PREINTS) {
|
|
80
|
+
return HllArray<A>::newHll(is);
|
|
81
|
+
} else if (preInts == HllUtil<A>::HASH_SET_PREINTS) {
|
|
82
|
+
return CouponHashSet<A>::newSet(is);
|
|
83
|
+
} else if (preInts == HllUtil<A>::LIST_PREINTS) {
|
|
84
|
+
return CouponList<A>::newList(is);
|
|
85
|
+
} else {
|
|
86
|
+
throw std::invalid_argument("Attempt to deserialize unknown object type");
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
template<typename A>
|
|
91
|
+
HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(const void* bytes, size_t len) {
|
|
92
|
+
// read current mode directly
|
|
93
|
+
const int preInts = static_cast<const uint8_t*>(bytes)[0];
|
|
94
|
+
if (preInts == HllUtil<A>::HLL_PREINTS) {
|
|
95
|
+
return HllArray<A>::newHll(bytes, len);
|
|
96
|
+
} else if (preInts == HllUtil<A>::HASH_SET_PREINTS) {
|
|
97
|
+
return CouponHashSet<A>::newSet(bytes, len);
|
|
98
|
+
} else if (preInts == HllUtil<A>::LIST_PREINTS) {
|
|
99
|
+
return CouponList<A>::newList(bytes, len);
|
|
100
|
+
} else {
|
|
101
|
+
throw std::invalid_argument("Attempt to deserialize unknown object type");
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
template<typename A>
|
|
106
|
+
HllArray<A>* HllSketchImplFactory<A>::newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize) {
|
|
107
|
+
switch (tgtHllType) {
|
|
108
|
+
case HLL_8:
|
|
109
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
|
|
110
|
+
return new (hll8Alloc().allocate(1)) Hll8Array<A>(lgConfigK, startFullSize);
|
|
111
|
+
case HLL_6:
|
|
112
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
|
|
113
|
+
return new (hll6Alloc().allocate(1)) Hll6Array<A>(lgConfigK, startFullSize);
|
|
114
|
+
case HLL_4:
|
|
115
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
|
|
116
|
+
return new (hll4Alloc().allocate(1)) Hll4Array<A>(lgConfigK, startFullSize);
|
|
117
|
+
}
|
|
118
|
+
throw std::logic_error("Invalid target_hll_type");
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
template<typename A>
|
|
122
|
+
HllSketchImpl<A>* HllSketchImplFactory<A>::reset(HllSketchImpl<A>* impl, bool startFullSize) {
|
|
123
|
+
if (startFullSize) {
|
|
124
|
+
HllArray<A>* hll = newHll(impl->getLgConfigK(), impl->getTgtHllType(), startFullSize);
|
|
125
|
+
impl->get_deleter()(impl);
|
|
126
|
+
return hll;
|
|
127
|
+
} else {
|
|
128
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
|
|
129
|
+
CouponList<A>* cl = new (clAlloc().allocate(1)) CouponList<A>(impl->getLgConfigK(), impl->getTgtHllType(), hll_mode::LIST);
|
|
130
|
+
impl->get_deleter()(impl);
|
|
131
|
+
return cl;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
template<typename A>
|
|
136
|
+
Hll4Array<A>* HllSketchImplFactory<A>::convertToHll4(const HllArray<A>& srcHllArr) {
|
|
137
|
+
const int lgConfigK = srcHllArr.getLgConfigK();
|
|
138
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
|
|
139
|
+
Hll4Array<A>* hll4Array = new (hll4Alloc().allocate(1)) Hll4Array<A>(lgConfigK, srcHllArr.isStartFullSize());
|
|
140
|
+
hll4Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
|
|
141
|
+
hll4Array->mergeHll(srcHllArr);
|
|
142
|
+
hll4Array->putHipAccum(srcHllArr.getHipAccum());
|
|
143
|
+
return hll4Array;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
template<typename A>
|
|
147
|
+
Hll6Array<A>* HllSketchImplFactory<A>::convertToHll6(const HllArray<A>& srcHllArr) {
|
|
148
|
+
const int lgConfigK = srcHllArr.getLgConfigK();
|
|
149
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
|
|
150
|
+
Hll6Array<A>* hll6Array = new (hll6Alloc().allocate(1)) Hll6Array<A>(lgConfigK, srcHllArr.isStartFullSize());
|
|
151
|
+
hll6Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
|
|
152
|
+
hll6Array->mergeHll(srcHllArr);
|
|
153
|
+
hll6Array->putHipAccum(srcHllArr.getHipAccum());
|
|
154
|
+
return hll6Array;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
template<typename A>
|
|
158
|
+
Hll8Array<A>* HllSketchImplFactory<A>::convertToHll8(const HllArray<A>& srcHllArr) {
|
|
159
|
+
const int lgConfigK = srcHllArr.getLgConfigK();
|
|
160
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
|
|
161
|
+
Hll8Array<A>* hll8Array = new (hll8Alloc().allocate(1)) Hll8Array<A>(lgConfigK, srcHllArr.isStartFullSize());
|
|
162
|
+
hll8Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
|
|
163
|
+
hll8Array->mergeHll(srcHllArr);
|
|
164
|
+
hll8Array->putHipAccum(srcHllArr.getHipAccum());
|
|
165
|
+
return hll8Array;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
#endif /* _HLLSKETCHIMPLFACTORY_HPP_ */
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef _HLLUNION_INTERNAL_HPP_
|
|
21
|
+
#define _HLLUNION_INTERNAL_HPP_
|
|
22
|
+
|
|
23
|
+
#include "hll.hpp"
|
|
24
|
+
|
|
25
|
+
#include "HllSketchImpl.hpp"
|
|
26
|
+
#include "HllArray.hpp"
|
|
27
|
+
#include "HllUtil.hpp"
|
|
28
|
+
|
|
29
|
+
#include <stdexcept>
|
|
30
|
+
#include <string>
|
|
31
|
+
|
|
32
|
+
namespace datasketches {
|
|
33
|
+
|
|
34
|
+
template<typename A>
|
|
35
|
+
hll_union_alloc<A>::hll_union_alloc(const int lg_max_k):
|
|
36
|
+
lg_max_k(HllUtil<A>::checkLgK(lg_max_k)),
|
|
37
|
+
gadget(lg_max_k, target_hll_type::HLL_8)
|
|
38
|
+
{}
|
|
39
|
+
|
|
40
|
+
template<typename A>
|
|
41
|
+
hll_sketch_alloc<A> hll_union_alloc<A>::get_result(target_hll_type target_type) const {
|
|
42
|
+
return hll_sketch_alloc<A>(gadget, target_type);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
template<typename A>
|
|
46
|
+
void hll_union_alloc<A>::update(const hll_sketch_alloc<A>& sketch) {
|
|
47
|
+
if (sketch.is_empty()) return;
|
|
48
|
+
union_impl(sketch, lg_max_k);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
template<typename A>
|
|
52
|
+
void hll_union_alloc<A>::update(hll_sketch_alloc<A>&& sketch) {
|
|
53
|
+
if (sketch.is_empty()) return;
|
|
54
|
+
if (gadget.is_empty() && sketch.get_target_type() == HLL_8 && sketch.get_lg_config_k() <= lg_max_k) {
|
|
55
|
+
if (sketch.get_current_mode() == HLL || sketch.get_lg_config_k() == lg_max_k) {
|
|
56
|
+
gadget = std::move(sketch);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
union_impl(sketch, lg_max_k);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
template<typename A>
|
|
63
|
+
void hll_union_alloc<A>::update(const std::string& datum) {
|
|
64
|
+
gadget.update(datum);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
template<typename A>
|
|
68
|
+
void hll_union_alloc<A>::update(const uint64_t datum) {
|
|
69
|
+
gadget.update(datum);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
template<typename A>
|
|
73
|
+
void hll_union_alloc<A>::update(const uint32_t datum) {
|
|
74
|
+
gadget.update(datum);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
template<typename A>
|
|
78
|
+
void hll_union_alloc<A>::update(const uint16_t datum) {
|
|
79
|
+
gadget.update(datum);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
template<typename A>
|
|
83
|
+
void hll_union_alloc<A>::update(const uint8_t datum) {
|
|
84
|
+
gadget.update(datum);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
template<typename A>
|
|
88
|
+
void hll_union_alloc<A>::update(const int64_t datum) {
|
|
89
|
+
gadget.update(datum);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
template<typename A>
|
|
93
|
+
void hll_union_alloc<A>::update(const int32_t datum) {
|
|
94
|
+
gadget.update(datum);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
template<typename A>
|
|
98
|
+
void hll_union_alloc<A>::update(const int16_t datum) {
|
|
99
|
+
gadget.update(datum);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
template<typename A>
|
|
103
|
+
void hll_union_alloc<A>::update(const int8_t datum) {
|
|
104
|
+
gadget.update(datum);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
template<typename A>
|
|
108
|
+
void hll_union_alloc<A>::update(const double datum) {
|
|
109
|
+
gadget.update(datum);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
template<typename A>
|
|
113
|
+
void hll_union_alloc<A>::update(const float datum) {
|
|
114
|
+
gadget.update(datum);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
template<typename A>
|
|
118
|
+
void hll_union_alloc<A>::update(const void* data, const size_t length_bytes) {
|
|
119
|
+
gadget.update(data, length_bytes);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
template<typename A>
|
|
123
|
+
void hll_union_alloc<A>::coupon_update(const int coupon) {
|
|
124
|
+
if (coupon == HllUtil<A>::EMPTY) { return; }
|
|
125
|
+
HllSketchImpl<A>* result = gadget.sketch_impl->coupon_update(coupon);
|
|
126
|
+
if (result != gadget.sketch_impl) {
|
|
127
|
+
if (gadget.sketch_impl != nullptr) { gadget.sketch_impl->get_deleter()(gadget.sketch_impl); }
|
|
128
|
+
gadget.sketch_impl = result;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
template<typename A>
|
|
133
|
+
double hll_union_alloc<A>::get_estimate() const {
|
|
134
|
+
return gadget.get_estimate();
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
template<typename A>
|
|
138
|
+
double hll_union_alloc<A>::get_composite_estimate() const {
|
|
139
|
+
return gadget.get_composite_estimate();
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
template<typename A>
|
|
143
|
+
double hll_union_alloc<A>::get_lower_bound(const int num_std_dev) const {
|
|
144
|
+
return gadget.get_lower_bound(num_std_dev);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
template<typename A>
|
|
148
|
+
double hll_union_alloc<A>::get_upper_bound(const int num_std_dev) const {
|
|
149
|
+
return gadget.get_upper_bound(num_std_dev);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
template<typename A>
|
|
153
|
+
int hll_union_alloc<A>::get_compact_serialization_bytes() const {
|
|
154
|
+
return gadget.get_compact_serialization_bytes();
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
template<typename A>
|
|
158
|
+
int hll_union_alloc<A>::get_updatable_serialization_bytes() const {
|
|
159
|
+
return gadget.get_updatable_serialization_bytes();
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
template<typename A>
|
|
163
|
+
int hll_union_alloc<A>::get_lg_config_k() const {
|
|
164
|
+
return gadget.get_lg_config_k();
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
template<typename A>
|
|
168
|
+
void hll_union_alloc<A>::reset() {
|
|
169
|
+
gadget.reset();
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
template<typename A>
|
|
173
|
+
bool hll_union_alloc<A>::is_compact() const {
|
|
174
|
+
return gadget.is_compact();
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
template<typename A>
|
|
178
|
+
bool hll_union_alloc<A>::is_empty() const {
|
|
179
|
+
return gadget.is_empty();
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
template<typename A>
|
|
183
|
+
bool hll_union_alloc<A>::is_out_of_order_flag() const {
|
|
184
|
+
return gadget.is_out_of_order_flag();
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
template<typename A>
|
|
188
|
+
hll_mode hll_union_alloc<A>::get_current_mode() const {
|
|
189
|
+
return gadget.get_current_mode();
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
template<typename A>
|
|
193
|
+
bool hll_union_alloc<A>::is_estimation_mode() const {
|
|
194
|
+
return gadget.is_estimation_mode();
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
template<typename A>
|
|
198
|
+
int hll_union_alloc<A>::get_serialization_version() const {
|
|
199
|
+
return HllUtil<A>::SER_VER;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
template<typename A>
|
|
203
|
+
target_hll_type hll_union_alloc<A>::get_target_type() const {
|
|
204
|
+
return target_hll_type::HLL_8;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
template<typename A>
|
|
208
|
+
int hll_union_alloc<A>::get_max_serialization_bytes(const int lg_k) {
|
|
209
|
+
return hll_sketch_alloc<A>::get_max_updatable_serialization_bytes(lg_k, target_hll_type::HLL_8);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
template<typename A>
|
|
213
|
+
double hll_union_alloc<A>::get_rel_err(const bool upper_bound, const bool unioned,
|
|
214
|
+
const int lg_config_k, const int num_std_dev) {
|
|
215
|
+
return HllUtil<A>::getRelErr(upper_bound, unioned, lg_config_k, num_std_dev);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
template<typename A>
|
|
219
|
+
HllSketchImpl<A>* hll_union_alloc<A>::copy_or_downsample(const HllSketchImpl<A>* src_impl, const int tgt_lg_k) {
|
|
220
|
+
if (src_impl->getCurMode() != HLL) {
|
|
221
|
+
throw std::logic_error("Attempt to downsample non-HLL sketch");
|
|
222
|
+
}
|
|
223
|
+
const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
|
|
224
|
+
const int src_lg_k = src->getLgConfigK();
|
|
225
|
+
if (src_lg_k <= tgt_lg_k) {
|
|
226
|
+
return src->copyAs(HLL_8);
|
|
227
|
+
}
|
|
228
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
|
|
229
|
+
Hll8Array<A>* tgtHllArr = new (hll8Alloc().allocate(1)) Hll8Array<A>(tgt_lg_k, false);
|
|
230
|
+
tgtHllArr->mergeHll(*src);
|
|
231
|
+
//both of these are required for isomorphism
|
|
232
|
+
tgtHllArr->putHipAccum(src->getHipAccum());
|
|
233
|
+
tgtHllArr->putOutOfOrderFlag(src->isOutOfOrderFlag());
|
|
234
|
+
return tgtHllArr;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
template<typename A>
|
|
238
|
+
inline HllSketchImpl<A>* hll_union_alloc<A>::leak_free_coupon_update(HllSketchImpl<A>* impl, const int coupon) {
|
|
239
|
+
HllSketchImpl<A>* result = impl->couponUpdate(coupon);
|
|
240
|
+
if (result != impl) {
|
|
241
|
+
impl->get_deleter()(impl);
|
|
242
|
+
}
|
|
243
|
+
return result;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
template<typename A>
|
|
247
|
+
void hll_union_alloc<A>::union_impl(const hll_sketch_alloc<A>& sketch, const int lg_max_k) {
|
|
248
|
+
const HllSketchImpl<A>* src_impl = sketch.sketch_impl; //default
|
|
249
|
+
HllSketchImpl<A>* dst_impl = gadget.sketch_impl; //default
|
|
250
|
+
if (src_impl->getCurMode() == LIST || src_impl->getCurMode() == SET) {
|
|
251
|
+
if (dst_impl->isEmpty() && src_impl->getLgConfigK() == dst_impl->getLgConfigK()) {
|
|
252
|
+
dst_impl = src_impl->copyAs(HLL_8);
|
|
253
|
+
gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget to be replaced
|
|
254
|
+
} else {
|
|
255
|
+
const CouponList<A>* src = static_cast<const CouponList<A>*>(src_impl);
|
|
256
|
+
for (auto coupon: *src) {
|
|
257
|
+
dst_impl = leak_free_coupon_update(dst_impl, coupon); //assignment required
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
} else if (!dst_impl->isEmpty()) { // src is HLL
|
|
261
|
+
if (dst_impl->getCurMode() == LIST || dst_impl->getCurMode() == SET) {
|
|
262
|
+
// swap so that src is LIST or SET, tgt is HLL
|
|
263
|
+
// use lg_max_k because LIST has effective K of 2^26
|
|
264
|
+
const CouponList<A>* src = static_cast<const CouponList<A>*>(dst_impl);
|
|
265
|
+
dst_impl = copy_or_downsample(src_impl, lg_max_k);
|
|
266
|
+
static_cast<Hll8Array<A>*>(dst_impl)->mergeList(*src);
|
|
267
|
+
gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget to be replaced
|
|
268
|
+
} else { // gadget is HLL
|
|
269
|
+
if (src_impl->getLgConfigK() < dst_impl->getLgConfigK()) {
|
|
270
|
+
dst_impl = copy_or_downsample(dst_impl, sketch.get_lg_config_k());
|
|
271
|
+
gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget to be replaced
|
|
272
|
+
}
|
|
273
|
+
const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
|
|
274
|
+
static_cast<Hll8Array<A>*>(dst_impl)->mergeHll(*src);
|
|
275
|
+
dst_impl->putOutOfOrderFlag(true);
|
|
276
|
+
static_cast<Hll8Array<A>*>(dst_impl)->putHipAccum(0);
|
|
277
|
+
}
|
|
278
|
+
} else { // src is HLL, gadget is empty
|
|
279
|
+
dst_impl = copy_or_downsample(src_impl, lg_max_k);
|
|
280
|
+
gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget to be replaced
|
|
281
|
+
}
|
|
282
|
+
gadget.sketch_impl = dst_impl; // gadget replaced
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
#endif // _HLLUNION_INTERNAL_HPP_
|