datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
@@ -0,0 +1,124 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#ifndef _HLL6ARRAY_INTERNAL_HPP_
|
21
|
+
#define _HLL6ARRAY_INTERNAL_HPP_
|
22
|
+
|
23
|
+
#include <cstring>
|
24
|
+
|
25
|
+
#include "Hll6Array.hpp"
|
26
|
+
|
27
|
+
namespace datasketches {
|
28
|
+
|
29
|
+
template<typename A>
|
30
|
+
Hll6Array<A>::Hll6Array(const int lgConfigK, const bool startFullSize) :
|
31
|
+
HllArray<A>(lgConfigK, target_hll_type::HLL_6, startFullSize) {
|
32
|
+
const int numBytes = this->hll6ArrBytes(lgConfigK);
|
33
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
|
34
|
+
this->hllByteArr = uint8Alloc().allocate(numBytes);
|
35
|
+
std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
|
36
|
+
}
|
37
|
+
|
38
|
+
template<typename A>
|
39
|
+
Hll6Array<A>::Hll6Array(const Hll6Array<A>& that) :
|
40
|
+
HllArray<A>(that)
|
41
|
+
{
|
42
|
+
// can determine hllByteArr size in parent class, no need to allocate here
|
43
|
+
}
|
44
|
+
|
45
|
+
template<typename A>
|
46
|
+
Hll6Array<A>::~Hll6Array() {
|
47
|
+
// hllByteArr deleted in parent
|
48
|
+
}
|
49
|
+
|
50
|
+
template<typename A>
|
51
|
+
std::function<void(HllSketchImpl<A>*)> Hll6Array<A>::get_deleter() const {
|
52
|
+
return [](HllSketchImpl<A>* ptr) {
|
53
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
|
54
|
+
Hll6Array<A>* hll = static_cast<Hll6Array<A>*>(ptr);
|
55
|
+
hll->~Hll6Array();
|
56
|
+
hll6Alloc().deallocate(hll, 1);
|
57
|
+
};
|
58
|
+
}
|
59
|
+
|
60
|
+
template<typename A>
|
61
|
+
Hll6Array<A>* Hll6Array<A>::copy() const {
|
62
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
|
63
|
+
return new (hll6Alloc().allocate(1)) Hll6Array<A>(*this);
|
64
|
+
}
|
65
|
+
|
66
|
+
template<typename A>
|
67
|
+
uint8_t Hll6Array<A>::getSlot(int slotNo) const {
|
68
|
+
const int startBit = slotNo * 6;
|
69
|
+
const int shift = startBit & 0x7;
|
70
|
+
const int byteIdx = startBit >> 3;
|
71
|
+
const uint16_t twoByteVal = (this->hllByteArr[byteIdx + 1] << 8) | this->hllByteArr[byteIdx];
|
72
|
+
return (twoByteVal >> shift) & HllUtil<A>::VAL_MASK_6;
|
73
|
+
}
|
74
|
+
|
75
|
+
template<typename A>
|
76
|
+
void Hll6Array<A>::putSlot(int slotNo, uint8_t value) {
|
77
|
+
const int startBit = slotNo * 6;
|
78
|
+
const int shift = startBit & 0x7;
|
79
|
+
const int byteIdx = startBit >> 3;
|
80
|
+
const uint16_t valShifted = (value & 0x3F) << shift;
|
81
|
+
uint16_t curMasked = (this->hllByteArr[byteIdx + 1] << 8) | this->hllByteArr[byteIdx];
|
82
|
+
curMasked &= (~(HllUtil<A>::VAL_MASK_6 << shift));
|
83
|
+
const uint16_t insert = curMasked | valShifted;
|
84
|
+
this->hllByteArr[byteIdx] = insert & 0xFF;
|
85
|
+
this->hllByteArr[byteIdx + 1] = (insert & 0xFF00) >> 8;
|
86
|
+
}
|
87
|
+
|
88
|
+
template<typename A>
|
89
|
+
int Hll6Array<A>::getHllByteArrBytes() const {
|
90
|
+
return this->hll6ArrBytes(this->lgConfigK);
|
91
|
+
}
|
92
|
+
|
93
|
+
template<typename A>
|
94
|
+
HllSketchImpl<A>* Hll6Array<A>::couponUpdate(const int coupon) {
|
95
|
+
internalCouponUpdate(coupon);
|
96
|
+
return this;
|
97
|
+
}
|
98
|
+
|
99
|
+
template<typename A>
|
100
|
+
void Hll6Array<A>::internalCouponUpdate(const int coupon) {
|
101
|
+
const int configKmask = (1 << this->lgConfigK) - 1;
|
102
|
+
const int slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
|
103
|
+
const int newVal = HllUtil<A>::getValue(coupon);
|
104
|
+
|
105
|
+
const int curVal = getSlot(slotNo);
|
106
|
+
if (newVal > curVal) {
|
107
|
+
putSlot(slotNo, newVal);
|
108
|
+
this->hipAndKxQIncrementalUpdate(curVal, newVal);
|
109
|
+
if (curVal == 0) {
|
110
|
+
this->numAtCurMin--; // interpret numAtCurMin as num zeros
|
111
|
+
}
|
112
|
+
}
|
113
|
+
}
|
114
|
+
|
115
|
+
template<typename A>
|
116
|
+
void Hll6Array<A>::mergeHll(const HllArray<A>& src) {
|
117
|
+
for (auto coupon: src) {
|
118
|
+
internalCouponUpdate(coupon);
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
}
|
123
|
+
|
124
|
+
#endif // _HLL6ARRAY_INTERNAL_HPP_
|
@@ -0,0 +1,55 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#ifndef _HLL6ARRAY_HPP_
|
21
|
+
#define _HLL6ARRAY_HPP_
|
22
|
+
|
23
|
+
#include "HllArray.hpp"
|
24
|
+
|
25
|
+
namespace datasketches {
|
26
|
+
|
27
|
+
template<typename A>
|
28
|
+
class Hll6Iterator;
|
29
|
+
|
30
|
+
template<typename A>
|
31
|
+
class Hll6Array final : public HllArray<A> {
|
32
|
+
public:
|
33
|
+
explicit Hll6Array(int lgConfigK, bool startFullSize);
|
34
|
+
explicit Hll6Array(const Hll6Array<A>& that);
|
35
|
+
|
36
|
+
virtual ~Hll6Array();
|
37
|
+
virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
|
38
|
+
|
39
|
+
virtual Hll6Array* copy() const;
|
40
|
+
|
41
|
+
inline uint8_t getSlot(int slotNo) const;
|
42
|
+
inline void putSlot(int slotNo, uint8_t value);
|
43
|
+
|
44
|
+
virtual HllSketchImpl<A>* couponUpdate(int coupon) final;
|
45
|
+
void mergeHll(const HllArray<A>& src);
|
46
|
+
|
47
|
+
virtual int getHllByteArrBytes() const;
|
48
|
+
|
49
|
+
private:
|
50
|
+
void internalCouponUpdate(int coupon);
|
51
|
+
};
|
52
|
+
|
53
|
+
}
|
54
|
+
|
55
|
+
#endif /* _HLL6ARRAY_HPP_ */
|
@@ -0,0 +1,158 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#ifndef _HLL8ARRAY_INTERNAL_HPP_
|
21
|
+
#define _HLL8ARRAY_INTERNAL_HPP_
|
22
|
+
|
23
|
+
#include "Hll8Array.hpp"
|
24
|
+
|
25
|
+
namespace datasketches {
|
26
|
+
|
27
|
+
template<typename A>
|
28
|
+
Hll8Array<A>::Hll8Array(const int lgConfigK, const bool startFullSize) :
|
29
|
+
HllArray<A>(lgConfigK, target_hll_type::HLL_8, startFullSize) {
|
30
|
+
const int numBytes = this->hll8ArrBytes(lgConfigK);
|
31
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
|
32
|
+
this->hllByteArr = uint8Alloc().allocate(numBytes);
|
33
|
+
std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
|
34
|
+
}
|
35
|
+
|
36
|
+
template<typename A>
|
37
|
+
Hll8Array<A>::Hll8Array(const Hll8Array<A>& that) :
|
38
|
+
HllArray<A>(that)
|
39
|
+
{
|
40
|
+
// can determine hllByteArr size in parent class, no need to allocate here
|
41
|
+
}
|
42
|
+
|
43
|
+
template<typename A>
|
44
|
+
Hll8Array<A>::~Hll8Array() {
|
45
|
+
// hllByteArr deleted in parent
|
46
|
+
}
|
47
|
+
|
48
|
+
template<typename A>
|
49
|
+
std::function<void(HllSketchImpl<A>*)> Hll8Array<A>::get_deleter() const {
|
50
|
+
return [](HllSketchImpl<A>* ptr) {
|
51
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
|
52
|
+
Hll8Array<A>* hll = static_cast<Hll8Array<A>*>(ptr);
|
53
|
+
hll->~Hll8Array();
|
54
|
+
hll8Alloc().deallocate(hll, 1);
|
55
|
+
};
|
56
|
+
}
|
57
|
+
|
58
|
+
template<typename A>
|
59
|
+
Hll8Array<A>* Hll8Array<A>::copy() const {
|
60
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
|
61
|
+
return new (hll8Alloc().allocate(1)) Hll8Array<A>(*this);
|
62
|
+
}
|
63
|
+
|
64
|
+
template<typename A>
|
65
|
+
uint8_t Hll8Array<A>::getSlot(const int slotNo) const {
|
66
|
+
return this->hllByteArr[slotNo];
|
67
|
+
}
|
68
|
+
|
69
|
+
template<typename A>
|
70
|
+
void Hll8Array<A>::putSlot(const int slotNo, uint8_t value) {
|
71
|
+
this->hllByteArr[slotNo] = value;
|
72
|
+
}
|
73
|
+
|
74
|
+
template<typename A>
|
75
|
+
int Hll8Array<A>::getHllByteArrBytes() const {
|
76
|
+
return this->hll8ArrBytes(this->lgConfigK);
|
77
|
+
}
|
78
|
+
|
79
|
+
template<typename A>
|
80
|
+
HllSketchImpl<A>* Hll8Array<A>::couponUpdate(int coupon) {
|
81
|
+
internalCouponUpdate(coupon);
|
82
|
+
return this;
|
83
|
+
}
|
84
|
+
|
85
|
+
template<typename A>
|
86
|
+
void Hll8Array<A>::internalCouponUpdate(int coupon) {
|
87
|
+
const int configKmask = (1 << this->lgConfigK) - 1;
|
88
|
+
const int slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
|
89
|
+
const int newVal = HllUtil<A>::getValue(coupon);
|
90
|
+
|
91
|
+
const int curVal = getSlot(slotNo);
|
92
|
+
if (newVal > curVal) {
|
93
|
+
putSlot(slotNo, newVal);
|
94
|
+
this->hipAndKxQIncrementalUpdate(curVal, newVal);
|
95
|
+
if (curVal == 0) {
|
96
|
+
this->numAtCurMin--; // interpret numAtCurMin as num zeros
|
97
|
+
}
|
98
|
+
}
|
99
|
+
}
|
100
|
+
|
101
|
+
template<typename A>
|
102
|
+
void Hll8Array<A>::mergeList(const CouponList<A>& src) {
|
103
|
+
for (auto coupon: src) {
|
104
|
+
internalCouponUpdate(coupon);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
template<typename A>
|
109
|
+
void Hll8Array<A>::mergeHll(const HllArray<A>& src) {
|
110
|
+
// at this point src_k >= dst_k
|
111
|
+
const int src_k = 1 << src.getLgConfigK();
|
112
|
+
const int dst_mask = (1 << this->getLgConfigK()) - 1;
|
113
|
+
// duplication below is to avoid a virtual method call in a loop
|
114
|
+
if (src.getTgtHllType() == target_hll_type::HLL_8) {
|
115
|
+
for (int i = 0; i < src_k; i++) {
|
116
|
+
const uint8_t new_v = static_cast<const Hll8Array<A>&>(src).getSlot(i);
|
117
|
+
const int j = i & dst_mask;
|
118
|
+
const uint8_t old_v = this->hllByteArr[j];
|
119
|
+
if (new_v > old_v) {
|
120
|
+
this->hllByteArr[j] = new_v;
|
121
|
+
this->hipAndKxQIncrementalUpdate(old_v, new_v);
|
122
|
+
if (old_v == 0) {
|
123
|
+
this->numAtCurMin--;
|
124
|
+
}
|
125
|
+
}
|
126
|
+
}
|
127
|
+
} else if (src.getTgtHllType() == target_hll_type::HLL_6) {
|
128
|
+
for (int i = 0; i < src_k; i++) {
|
129
|
+
const uint8_t new_v = static_cast<const Hll6Array<A>&>(src).getSlot(i);
|
130
|
+
const int j = i & dst_mask;
|
131
|
+
const uint8_t old_v = this->hllByteArr[j];
|
132
|
+
if (new_v > old_v) {
|
133
|
+
this->hllByteArr[j] = new_v;
|
134
|
+
this->hipAndKxQIncrementalUpdate(old_v, new_v);
|
135
|
+
if (old_v == 0) {
|
136
|
+
this->numAtCurMin--;
|
137
|
+
}
|
138
|
+
}
|
139
|
+
}
|
140
|
+
} else { // HLL_4
|
141
|
+
for (int i = 0; i < src_k; i++) {
|
142
|
+
const uint8_t new_v = static_cast<const Hll4Array<A>&>(src).get_value(i);
|
143
|
+
const int j = i & dst_mask;
|
144
|
+
const uint8_t old_v = this->hllByteArr[j];
|
145
|
+
if (new_v > old_v) {
|
146
|
+
this->hllByteArr[j] = new_v;
|
147
|
+
this->hipAndKxQIncrementalUpdate(old_v, new_v);
|
148
|
+
if (old_v == 0) {
|
149
|
+
this->numAtCurMin--;
|
150
|
+
}
|
151
|
+
}
|
152
|
+
}
|
153
|
+
}
|
154
|
+
}
|
155
|
+
|
156
|
+
}
|
157
|
+
|
158
|
+
#endif // _HLL8ARRAY_INTERNAL_HPP_
|
@@ -0,0 +1,56 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#ifndef _HLL8ARRAY_HPP_
|
21
|
+
#define _HLL8ARRAY_HPP_
|
22
|
+
|
23
|
+
#include "HllArray.hpp"
|
24
|
+
|
25
|
+
namespace datasketches {
|
26
|
+
|
27
|
+
template<typename A>
|
28
|
+
class Hll8Iterator;
|
29
|
+
|
30
|
+
template<typename A>
|
31
|
+
class Hll8Array final : public HllArray<A> {
|
32
|
+
public:
|
33
|
+
explicit Hll8Array(int lgConfigK, bool startFullSize);
|
34
|
+
explicit Hll8Array(const Hll8Array& that);
|
35
|
+
|
36
|
+
virtual ~Hll8Array();
|
37
|
+
virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
|
38
|
+
|
39
|
+
virtual Hll8Array<A>* copy() const;
|
40
|
+
|
41
|
+
inline uint8_t getSlot(int slotNo) const;
|
42
|
+
inline void putSlot(int slotNo, uint8_t value);
|
43
|
+
|
44
|
+
virtual HllSketchImpl<A>* couponUpdate(int coupon) final;
|
45
|
+
void mergeList(const CouponList<A>& src);
|
46
|
+
void mergeHll(const HllArray<A>& src);
|
47
|
+
|
48
|
+
virtual int getHllByteArrBytes() const;
|
49
|
+
|
50
|
+
private:
|
51
|
+
inline void internalCouponUpdate(int coupon);
|
52
|
+
};
|
53
|
+
|
54
|
+
}
|
55
|
+
|
56
|
+
#endif /* _HLL8ARRAY_HPP_ */
|
@@ -0,0 +1,706 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#ifndef _HLLARRAY_INTERNAL_HPP_
|
21
|
+
#define _HLLARRAY_INTERNAL_HPP_
|
22
|
+
|
23
|
+
#include "HllArray.hpp"
|
24
|
+
#include "HllUtil.hpp"
|
25
|
+
#include "HarmonicNumbers.hpp"
|
26
|
+
#include "CubicInterpolation.hpp"
|
27
|
+
#include "CompositeInterpolationXTable.hpp"
|
28
|
+
#include "CouponList.hpp"
|
29
|
+
#include "inv_pow2_table.hpp"
|
30
|
+
#include <cstring>
|
31
|
+
#include <cmath>
|
32
|
+
#include <stdexcept>
|
33
|
+
#include <string>
|
34
|
+
|
35
|
+
namespace datasketches {
|
36
|
+
|
37
|
+
template<typename A>
|
38
|
+
HllArray<A>::HllArray(const int lgConfigK, const target_hll_type tgtHllType, bool startFullSize)
|
39
|
+
: HllSketchImpl<A>(lgConfigK, tgtHllType, hll_mode::HLL, startFullSize) {
|
40
|
+
hipAccum = 0.0;
|
41
|
+
kxq0 = 1 << lgConfigK;
|
42
|
+
kxq1 = 0.0;
|
43
|
+
curMin = 0;
|
44
|
+
numAtCurMin = 1 << lgConfigK;
|
45
|
+
oooFlag = false;
|
46
|
+
hllByteArr = nullptr; // allocated in derived class
|
47
|
+
}
|
48
|
+
|
49
|
+
template<typename A>
|
50
|
+
HllArray<A>::HllArray(const HllArray<A>& that):
|
51
|
+
HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, hll_mode::HLL, that.startFullSize),
|
52
|
+
hipAccum(that.hipAccum),
|
53
|
+
kxq0(that.kxq0),
|
54
|
+
kxq1(that.kxq1),
|
55
|
+
hllByteArr(nullptr),
|
56
|
+
curMin(that.curMin),
|
57
|
+
numAtCurMin(that.numAtCurMin),
|
58
|
+
oooFlag(that.oooFlag)
|
59
|
+
{
|
60
|
+
const int arrayLen = that.getHllByteArrBytes();
|
61
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
|
62
|
+
hllByteArr = uint8Alloc().allocate(arrayLen);
|
63
|
+
std::copy(that.hllByteArr, that.hllByteArr + arrayLen, hllByteArr);
|
64
|
+
}
|
65
|
+
|
66
|
+
template<typename A>
|
67
|
+
HllArray<A>::~HllArray() {
|
68
|
+
// need to determine number of bytes to deallocate
|
69
|
+
int hllArrBytes = 0;
|
70
|
+
if (this->tgtHllType == target_hll_type::HLL_4) {
|
71
|
+
hllArrBytes = hll4ArrBytes(this->lgConfigK);
|
72
|
+
} else if (this->tgtHllType == target_hll_type::HLL_6) {
|
73
|
+
hllArrBytes = hll6ArrBytes(this->lgConfigK);
|
74
|
+
} else { // tgtHllType == HLL_8
|
75
|
+
hllArrBytes = hll8ArrBytes(this->lgConfigK);
|
76
|
+
}
|
77
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
|
78
|
+
uint8Alloc().deallocate(hllByteArr, hllArrBytes);
|
79
|
+
}
|
80
|
+
|
81
|
+
template<typename A>
|
82
|
+
HllArray<A>* HllArray<A>::copyAs(const target_hll_type tgtHllType) const {
|
83
|
+
if (tgtHllType == this->getTgtHllType()) {
|
84
|
+
return static_cast<HllArray*>(copy());
|
85
|
+
}
|
86
|
+
if (tgtHllType == target_hll_type::HLL_4) {
|
87
|
+
return HllSketchImplFactory<A>::convertToHll4(*this);
|
88
|
+
} else if (tgtHllType == target_hll_type::HLL_6) {
|
89
|
+
return HllSketchImplFactory<A>::convertToHll6(*this);
|
90
|
+
} else { // tgtHllType == HLL_8
|
91
|
+
return HllSketchImplFactory<A>::convertToHll8(*this);
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
template<typename A>
|
96
|
+
HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) {
|
97
|
+
if (len < HllUtil<A>::HLL_BYTE_ARR_START) {
|
98
|
+
throw std::out_of_range("Input data length insufficient to hold HLL array");
|
99
|
+
}
|
100
|
+
|
101
|
+
const uint8_t* data = static_cast<const uint8_t*>(bytes);
|
102
|
+
if (data[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HLL_PREINTS) {
|
103
|
+
throw std::invalid_argument("Incorrect number of preInts in input stream");
|
104
|
+
}
|
105
|
+
if (data[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
|
106
|
+
throw std::invalid_argument("Wrong ser ver in input stream");
|
107
|
+
}
|
108
|
+
if (data[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
|
109
|
+
throw std::invalid_argument("Input array is not an HLL sketch");
|
110
|
+
}
|
111
|
+
|
112
|
+
const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
|
113
|
+
if (mode != HLL) {
|
114
|
+
throw std::invalid_argument("Calling HLL array construtor with non-HLL mode data");
|
115
|
+
}
|
116
|
+
|
117
|
+
const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
|
118
|
+
const bool oooFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
|
119
|
+
const bool comapctFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
|
120
|
+
const bool startFullSizeFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::FULL_SIZE_FLAG_MASK) ? true : false);
|
121
|
+
|
122
|
+
const int lgK = (int) data[HllUtil<A>::LG_K_BYTE];
|
123
|
+
const int curMin = (int) data[HllUtil<A>::HLL_CUR_MIN_BYTE];
|
124
|
+
|
125
|
+
const int arrayBytes = hllArrBytes(tgtHllType, lgK);
|
126
|
+
if (len < static_cast<size_t>(HllUtil<A>::HLL_BYTE_ARR_START + arrayBytes)) {
|
127
|
+
throw std::out_of_range("Input array too small to hold sketch image");
|
128
|
+
}
|
129
|
+
|
130
|
+
double hip, kxq0, kxq1;
|
131
|
+
std::memcpy(&hip, data + HllUtil<A>::HIP_ACCUM_DOUBLE, sizeof(double));
|
132
|
+
std::memcpy(&kxq0, data + HllUtil<A>::KXQ0_DOUBLE, sizeof(double));
|
133
|
+
std::memcpy(&kxq1, data + HllUtil<A>::KXQ1_DOUBLE, sizeof(double));
|
134
|
+
|
135
|
+
int numAtCurMin, auxCount;
|
136
|
+
std::memcpy(&numAtCurMin, data + HllUtil<A>::CUR_MIN_COUNT_INT, sizeof(int));
|
137
|
+
std::memcpy(&auxCount, data + HllUtil<A>::AUX_COUNT_INT, sizeof(int));
|
138
|
+
|
139
|
+
AuxHashMap<A>* auxHashMap = nullptr;
|
140
|
+
typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
|
141
|
+
aux_hash_map_ptr aux_ptr;
|
142
|
+
if (auxCount > 0) { // necessarily TgtHllType == HLL_4
|
143
|
+
int auxLgIntArrSize = (int) data[4];
|
144
|
+
const size_t offset = HllUtil<A>::HLL_BYTE_ARR_START + arrayBytes;
|
145
|
+
const uint8_t* auxDataStart = data + offset;
|
146
|
+
auxHashMap = AuxHashMap<A>::deserialize(auxDataStart, len - offset, lgK, auxCount, auxLgIntArrSize, comapctFlag);
|
147
|
+
aux_ptr = aux_hash_map_ptr(auxHashMap, auxHashMap->make_deleter());
|
148
|
+
}
|
149
|
+
|
150
|
+
HllArray<A>* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag);
|
151
|
+
sketch->putCurMin(curMin);
|
152
|
+
sketch->putOutOfOrderFlag(oooFlag);
|
153
|
+
if (!oooFlag) sketch->putHipAccum(hip);
|
154
|
+
sketch->putKxQ0(kxq0);
|
155
|
+
sketch->putKxQ1(kxq1);
|
156
|
+
sketch->putNumAtCurMin(numAtCurMin);
|
157
|
+
|
158
|
+
std::memcpy(sketch->hllByteArr, data + HllUtil<A>::HLL_BYTE_ARR_START, arrayBytes);
|
159
|
+
|
160
|
+
if (auxHashMap != nullptr)
|
161
|
+
((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
|
162
|
+
|
163
|
+
aux_ptr.release();
|
164
|
+
return sketch;
|
165
|
+
}
|
166
|
+
|
167
|
+
template<typename A>
|
168
|
+
HllArray<A>* HllArray<A>::newHll(std::istream& is) {
|
169
|
+
uint8_t listHeader[8];
|
170
|
+
is.read((char*)listHeader, 8 * sizeof(uint8_t));
|
171
|
+
|
172
|
+
if (listHeader[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HLL_PREINTS) {
|
173
|
+
throw std::invalid_argument("Incorrect number of preInts in input stream");
|
174
|
+
}
|
175
|
+
if (listHeader[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
|
176
|
+
throw std::invalid_argument("Wrong ser ver in input stream");
|
177
|
+
}
|
178
|
+
if (listHeader[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
|
179
|
+
throw std::invalid_argument("Input stream is not an HLL sketch");
|
180
|
+
}
|
181
|
+
|
182
|
+
hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
|
183
|
+
if (mode != HLL) {
|
184
|
+
throw std::invalid_argument("Calling HLL construtor with non-HLL mode data");
|
185
|
+
}
|
186
|
+
|
187
|
+
const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
|
188
|
+
const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
|
189
|
+
const bool comapctFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
|
190
|
+
const bool startFullSizeFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::FULL_SIZE_FLAG_MASK) ? true : false);
|
191
|
+
|
192
|
+
const int lgK = (int) listHeader[HllUtil<A>::LG_K_BYTE];
|
193
|
+
const int curMin = (int) listHeader[HllUtil<A>::HLL_CUR_MIN_BYTE];
|
194
|
+
|
195
|
+
HllArray* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag);
|
196
|
+
typedef std::unique_ptr<HllArray<A>, std::function<void(HllSketchImpl<A>*)>> hll_array_ptr;
|
197
|
+
hll_array_ptr sketch_ptr(sketch, sketch->get_deleter());
|
198
|
+
sketch->putCurMin(curMin);
|
199
|
+
sketch->putOutOfOrderFlag(oooFlag);
|
200
|
+
|
201
|
+
double hip, kxq0, kxq1;
|
202
|
+
is.read((char*)&hip, sizeof(hip));
|
203
|
+
is.read((char*)&kxq0, sizeof(kxq0));
|
204
|
+
is.read((char*)&kxq1, sizeof(kxq1));
|
205
|
+
if (!oooFlag) sketch->putHipAccum(hip);
|
206
|
+
sketch->putKxQ0(kxq0);
|
207
|
+
sketch->putKxQ1(kxq1);
|
208
|
+
|
209
|
+
int numAtCurMin, auxCount;
|
210
|
+
is.read((char*)&numAtCurMin, sizeof(numAtCurMin));
|
211
|
+
is.read((char*)&auxCount, sizeof(auxCount));
|
212
|
+
sketch->putNumAtCurMin(numAtCurMin);
|
213
|
+
|
214
|
+
is.read((char*)sketch->hllByteArr, sketch->getHllByteArrBytes());
|
215
|
+
|
216
|
+
if (auxCount > 0) { // necessarily TgtHllType == HLL_4
|
217
|
+
int auxLgIntArrSize = listHeader[4];
|
218
|
+
AuxHashMap<A>* auxHashMap = AuxHashMap<A>::deserialize(is, lgK, auxCount, auxLgIntArrSize, comapctFlag);
|
219
|
+
((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
|
220
|
+
}
|
221
|
+
|
222
|
+
if (!is.good())
|
223
|
+
throw std::runtime_error("error reading from std::istream");
|
224
|
+
|
225
|
+
return sketch_ptr.release();
|
226
|
+
}
|
227
|
+
|
228
|
+
template<typename A>
|
229
|
+
vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) const {
|
230
|
+
const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
|
231
|
+
vector_u8<A> byteArr(sketchSizeBytes);
|
232
|
+
uint8_t* bytes = byteArr.data() + header_size_bytes;
|
233
|
+
AuxHashMap<A>* auxHashMap = getAuxHashMap();
|
234
|
+
|
235
|
+
bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
|
236
|
+
bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER);
|
237
|
+
bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID);
|
238
|
+
bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK);
|
239
|
+
bytes[HllUtil<A>::LG_ARR_BYTE] = static_cast<uint8_t>(auxHashMap == nullptr ? 0 : auxHashMap->getLgAuxArrInts());
|
240
|
+
bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact);
|
241
|
+
bytes[HllUtil<A>::HLL_CUR_MIN_BYTE] = static_cast<uint8_t>(curMin);
|
242
|
+
bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte();
|
243
|
+
|
244
|
+
std::memcpy(bytes + HllUtil<A>::HIP_ACCUM_DOUBLE, &hipAccum, sizeof(double));
|
245
|
+
std::memcpy(bytes + HllUtil<A>::KXQ0_DOUBLE, &kxq0, sizeof(double));
|
246
|
+
std::memcpy(bytes + HllUtil<A>::KXQ1_DOUBLE, &kxq1, sizeof(double));
|
247
|
+
std::memcpy(bytes + HllUtil<A>::CUR_MIN_COUNT_INT, &numAtCurMin, sizeof(int));
|
248
|
+
const int auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
|
249
|
+
std::memcpy(bytes + HllUtil<A>::AUX_COUNT_INT, &auxCount, sizeof(int));
|
250
|
+
|
251
|
+
const int hllByteArrBytes = getHllByteArrBytes();
|
252
|
+
std::memcpy(bytes + getMemDataStart(), hllByteArr, hllByteArrBytes);
|
253
|
+
|
254
|
+
// aux map if HLL_4
|
255
|
+
if (this->tgtHllType == HLL_4) {
|
256
|
+
bytes += getMemDataStart() + hllByteArrBytes; // start of auxHashMap
|
257
|
+
if (auxHashMap != nullptr) {
|
258
|
+
if (compact) {
|
259
|
+
for (uint32_t coupon: *auxHashMap) {
|
260
|
+
std::memcpy(bytes, &coupon, sizeof(coupon));
|
261
|
+
bytes += sizeof(coupon);
|
262
|
+
}
|
263
|
+
} else {
|
264
|
+
std::memcpy(bytes, auxHashMap->getAuxIntArr(), auxHashMap->getUpdatableSizeBytes());
|
265
|
+
}
|
266
|
+
} else if (!compact) {
|
267
|
+
// if updatable, we write even if currently unused so the binary can be wrapped
|
268
|
+
int auxBytes = 4 << HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK];
|
269
|
+
std::fill_n(bytes, auxBytes, 0);
|
270
|
+
}
|
271
|
+
}
|
272
|
+
|
273
|
+
return byteArr;
|
274
|
+
}
|
275
|
+
|
276
|
+
template<typename A>
|
277
|
+
void HllArray<A>::serialize(std::ostream& os, const bool compact) const {
|
278
|
+
// header
|
279
|
+
const uint8_t preInts(getPreInts());
|
280
|
+
os.write((char*)&preInts, sizeof(preInts));
|
281
|
+
const uint8_t serialVersion(HllUtil<A>::SER_VER);
|
282
|
+
os.write((char*)&serialVersion, sizeof(serialVersion));
|
283
|
+
const uint8_t familyId(HllUtil<A>::FAMILY_ID);
|
284
|
+
os.write((char*)&familyId, sizeof(familyId));
|
285
|
+
const uint8_t lgKByte((uint8_t) this->lgConfigK);
|
286
|
+
os.write((char*)&lgKByte, sizeof(lgKByte));
|
287
|
+
|
288
|
+
AuxHashMap<A>* auxHashMap = getAuxHashMap();
|
289
|
+
uint8_t lgArrByte(0);
|
290
|
+
if (auxHashMap != nullptr) {
|
291
|
+
lgArrByte = auxHashMap->getLgAuxArrInts();
|
292
|
+
}
|
293
|
+
os.write((char*)&lgArrByte, sizeof(lgArrByte));
|
294
|
+
|
295
|
+
const uint8_t flagsByte(this->makeFlagsByte(compact));
|
296
|
+
os.write((char*)&flagsByte, sizeof(flagsByte));
|
297
|
+
const uint8_t curMinByte((uint8_t) curMin);
|
298
|
+
os.write((char*)&curMinByte, sizeof(curMinByte));
|
299
|
+
const uint8_t modeByte(this->makeModeByte());
|
300
|
+
os.write((char*)&modeByte, sizeof(modeByte));
|
301
|
+
|
302
|
+
// estimator data
|
303
|
+
os.write((char*)&hipAccum, sizeof(hipAccum));
|
304
|
+
os.write((char*)&kxq0, sizeof(kxq0));
|
305
|
+
os.write((char*)&kxq1, sizeof(kxq1));
|
306
|
+
|
307
|
+
// array data
|
308
|
+
os.write((char*)&numAtCurMin, sizeof(numAtCurMin));
|
309
|
+
|
310
|
+
const int auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
|
311
|
+
os.write((char*)&auxCount, sizeof(auxCount));
|
312
|
+
os.write((char*)hllByteArr, getHllByteArrBytes());
|
313
|
+
|
314
|
+
// aux map if HLL_4
|
315
|
+
if (this->tgtHllType == HLL_4) {
|
316
|
+
if (auxHashMap != nullptr) {
|
317
|
+
if (compact) {
|
318
|
+
for (uint32_t coupon: *auxHashMap) {
|
319
|
+
os.write((char*)&coupon, sizeof(coupon));
|
320
|
+
}
|
321
|
+
} else {
|
322
|
+
os.write((char*)auxHashMap->getAuxIntArr(), auxHashMap->getUpdatableSizeBytes());
|
323
|
+
}
|
324
|
+
} else if (!compact) {
|
325
|
+
// if updatable, we write even if currently unused so the binary can be wrapped
|
326
|
+
int auxBytes = 4 << HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK];
|
327
|
+
std::fill_n(std::ostreambuf_iterator<char>(os), auxBytes, 0);
|
328
|
+
}
|
329
|
+
}
|
330
|
+
}
|
331
|
+
|
332
|
+
template<typename A>
|
333
|
+
double HllArray<A>::getEstimate() const {
|
334
|
+
if (oooFlag) {
|
335
|
+
return getCompositeEstimate();
|
336
|
+
}
|
337
|
+
return getHipAccum();
|
338
|
+
}
|
339
|
+
|
340
|
+
// HLL UPPER AND LOWER BOUNDS
|
341
|
+
|
342
|
+
/*
|
343
|
+
* The upper and lower bounds are not symmetric and thus are treated slightly differently.
|
344
|
+
* For the lower bound, when the unique count is <= k, LB >= numNonZeros, where
|
345
|
+
* numNonZeros = k - numAtCurMin AND curMin == 0.
|
346
|
+
*
|
347
|
+
* For HLL6 and HLL8, curMin is always 0 and numAtCurMin is initialized to k and is decremented
|
348
|
+
* down for each valid update until it reaches 0, where it stays. Thus, for these two
|
349
|
+
* isomorphs, when numAtCurMin = 0, means the true curMin is > 0 and the unique count must be
|
350
|
+
* greater than k.
|
351
|
+
*
|
352
|
+
* HLL4 always maintains both curMin and numAtCurMin dynamically. Nonetheless, the rules for
|
353
|
+
* the very small values <= k where curMin = 0 still apply.
|
354
|
+
*/
|
355
|
+
template<typename A>
|
356
|
+
double HllArray<A>::getLowerBound(const int numStdDev) const {
|
357
|
+
HllUtil<A>::checkNumStdDev(numStdDev);
|
358
|
+
const int configK = 1 << this->lgConfigK;
|
359
|
+
const double numNonZeros = ((curMin == 0) ? (configK - numAtCurMin) : configK);
|
360
|
+
|
361
|
+
double estimate;
|
362
|
+
double rseFactor;
|
363
|
+
if (oooFlag) {
|
364
|
+
estimate = getCompositeEstimate();
|
365
|
+
rseFactor = HllUtil<A>::HLL_NON_HIP_RSE_FACTOR;
|
366
|
+
} else {
|
367
|
+
estimate = hipAccum;
|
368
|
+
rseFactor = HllUtil<A>::HLL_HIP_RSE_FACTOR;
|
369
|
+
}
|
370
|
+
|
371
|
+
double relErr;
|
372
|
+
if (this->lgConfigK > 12) {
|
373
|
+
relErr = (numStdDev * rseFactor) / sqrt(configK);
|
374
|
+
} else {
|
375
|
+
relErr = HllUtil<A>::getRelErr(false, oooFlag, this->lgConfigK, numStdDev);
|
376
|
+
}
|
377
|
+
return fmax(estimate / (1.0 + relErr), numNonZeros);
|
378
|
+
}
|
379
|
+
|
380
|
+
template<typename A>
|
381
|
+
double HllArray<A>::getUpperBound(const int numStdDev) const {
|
382
|
+
HllUtil<A>::checkNumStdDev(numStdDev);
|
383
|
+
const int configK = 1 << this->lgConfigK;
|
384
|
+
|
385
|
+
double estimate;
|
386
|
+
double rseFactor;
|
387
|
+
if (oooFlag) {
|
388
|
+
estimate = getCompositeEstimate();
|
389
|
+
rseFactor = HllUtil<A>::HLL_NON_HIP_RSE_FACTOR;
|
390
|
+
} else {
|
391
|
+
estimate = hipAccum;
|
392
|
+
rseFactor = HllUtil<A>::HLL_HIP_RSE_FACTOR;
|
393
|
+
}
|
394
|
+
|
395
|
+
double relErr;
|
396
|
+
if (this->lgConfigK > 12) {
|
397
|
+
relErr = (-1.0) * (numStdDev * rseFactor) / sqrt(configK);
|
398
|
+
} else {
|
399
|
+
relErr = HllUtil<A>::getRelErr(true, oooFlag, this->lgConfigK, numStdDev);
|
400
|
+
}
|
401
|
+
return estimate / (1.0 + relErr);
|
402
|
+
}
|
403
|
+
|
404
|
+
/**
|
405
|
+
* This is the (non-HIP) estimator.
|
406
|
+
* It is called "composite" because multiple estimators are pasted together.
|
407
|
+
* @param absHllArr an instance of the AbstractHllArray class.
|
408
|
+
* @return the composite estimate
|
409
|
+
*/
|
410
|
+
// Original C: again-two-registers.c hhb_get_composite_estimate L1489
|
411
|
+
template<typename A>
|
412
|
+
double HllArray<A>::getCompositeEstimate() const {
|
413
|
+
const double rawEst = getHllRawEstimate(this->lgConfigK, kxq0 + kxq1);
|
414
|
+
|
415
|
+
const double* xArr = CompositeInterpolationXTable<A>::get_x_arr(this->lgConfigK);
|
416
|
+
const int xArrLen = CompositeInterpolationXTable<A>::get_x_arr_length();
|
417
|
+
const double yStride = CompositeInterpolationXTable<A>::get_y_stride(this->lgConfigK);
|
418
|
+
|
419
|
+
if (rawEst < xArr[0]) {
|
420
|
+
return 0;
|
421
|
+
}
|
422
|
+
|
423
|
+
const int xArrLenM1 = xArrLen - 1;
|
424
|
+
|
425
|
+
if (rawEst > xArr[xArrLenM1]) {
|
426
|
+
double finalY = yStride * xArrLenM1;
|
427
|
+
double factor = finalY / xArr[xArrLenM1];
|
428
|
+
return rawEst * factor;
|
429
|
+
}
|
430
|
+
|
431
|
+
double adjEst = CubicInterpolation<A>::usingXArrAndYStride(xArr, xArrLen, yStride, rawEst);
|
432
|
+
|
433
|
+
// We need to completely avoid the linear_counting estimator if it might have a crazy value.
|
434
|
+
// Empirical evidence suggests that the threshold 3*k will keep us safe if 2^4 <= k <= 2^21.
|
435
|
+
|
436
|
+
if (adjEst > (3 << this->lgConfigK)) { return adjEst; }
|
437
|
+
|
438
|
+
const double linEst =
|
439
|
+
getHllBitMapEstimate(this->lgConfigK, curMin, numAtCurMin);
|
440
|
+
|
441
|
+
// Bias is created when the value of an estimator is compared with a threshold to decide whether
|
442
|
+
// to use that estimator or a different one.
|
443
|
+
// We conjecture that less bias is created when the average of the two estimators
|
444
|
+
// is compared with the threshold. Empirical measurements support this conjecture.
|
445
|
+
|
446
|
+
const double avgEst = (adjEst + linEst) / 2.0;
|
447
|
+
|
448
|
+
// The following constants comes from empirical measurements of the crossover point
|
449
|
+
// between the average error of the linear estimator and the adjusted hll estimator
|
450
|
+
double crossOver = 0.64;
|
451
|
+
if (this->lgConfigK == 4) { crossOver = 0.718; }
|
452
|
+
else if (this->lgConfigK == 5) { crossOver = 0.672; }
|
453
|
+
|
454
|
+
return (avgEst > (crossOver * (1 << this->lgConfigK))) ? adjEst : linEst;
|
455
|
+
}
|
456
|
+
|
457
|
+
template<typename A>
|
458
|
+
double HllArray<A>::getKxQ0() const {
|
459
|
+
return kxq0;
|
460
|
+
}
|
461
|
+
|
462
|
+
template<typename A>
|
463
|
+
double HllArray<A>::getKxQ1() const {
|
464
|
+
return kxq1;
|
465
|
+
}
|
466
|
+
|
467
|
+
template<typename A>
|
468
|
+
double HllArray<A>::getHipAccum() const {
|
469
|
+
return hipAccum;
|
470
|
+
}
|
471
|
+
|
472
|
+
template<typename A>
|
473
|
+
int HllArray<A>::getCurMin() const {
|
474
|
+
return curMin;
|
475
|
+
}
|
476
|
+
|
477
|
+
template<typename A>
|
478
|
+
int HllArray<A>::getNumAtCurMin() const {
|
479
|
+
return numAtCurMin;
|
480
|
+
}
|
481
|
+
|
482
|
+
template<typename A>
|
483
|
+
void HllArray<A>::putKxQ0(const double kxq0) {
|
484
|
+
this->kxq0 = kxq0;
|
485
|
+
}
|
486
|
+
|
487
|
+
template<typename A>
|
488
|
+
void HllArray<A>::putKxQ1(const double kxq1) {
|
489
|
+
this->kxq1 = kxq1;
|
490
|
+
}
|
491
|
+
|
492
|
+
template<typename A>
|
493
|
+
void HllArray<A>::putHipAccum(const double hipAccum) {
|
494
|
+
this->hipAccum = hipAccum;
|
495
|
+
}
|
496
|
+
|
497
|
+
template<typename A>
|
498
|
+
void HllArray<A>::putCurMin(const int curMin) {
|
499
|
+
this->curMin = curMin;
|
500
|
+
}
|
501
|
+
|
502
|
+
template<typename A>
|
503
|
+
void HllArray<A>::putNumAtCurMin(const int numAtCurMin) {
|
504
|
+
this->numAtCurMin = numAtCurMin;
|
505
|
+
}
|
506
|
+
|
507
|
+
template<typename A>
|
508
|
+
void HllArray<A>::decNumAtCurMin() {
|
509
|
+
--numAtCurMin;
|
510
|
+
}
|
511
|
+
|
512
|
+
template<typename A>
|
513
|
+
void HllArray<A>::addToHipAccum(const double delta) {
|
514
|
+
hipAccum += delta;
|
515
|
+
}
|
516
|
+
|
517
|
+
template<typename A>
|
518
|
+
bool HllArray<A>::isCompact() const {
|
519
|
+
return false;
|
520
|
+
}
|
521
|
+
|
522
|
+
template<typename A>
|
523
|
+
bool HllArray<A>::isEmpty() const {
|
524
|
+
const int configK = 1 << this->lgConfigK;
|
525
|
+
return (getCurMin() == 0) && (getNumAtCurMin() == configK);
|
526
|
+
}
|
527
|
+
|
528
|
+
template<typename A>
|
529
|
+
void HllArray<A>::putOutOfOrderFlag(bool flag) {
|
530
|
+
oooFlag = flag;
|
531
|
+
}
|
532
|
+
|
533
|
+
template<typename A>
|
534
|
+
bool HllArray<A>::isOutOfOrderFlag() const {
|
535
|
+
return oooFlag;
|
536
|
+
}
|
537
|
+
|
538
|
+
template<typename A>
|
539
|
+
int HllArray<A>::hllArrBytes(target_hll_type tgtHllType, int lgConfigK) {
|
540
|
+
switch (tgtHllType) {
|
541
|
+
case HLL_4:
|
542
|
+
return hll4ArrBytes(lgConfigK);
|
543
|
+
case HLL_6:
|
544
|
+
return hll6ArrBytes(lgConfigK);
|
545
|
+
case HLL_8:
|
546
|
+
return hll8ArrBytes(lgConfigK);
|
547
|
+
default:
|
548
|
+
throw std::invalid_argument("Invalid target HLL type");
|
549
|
+
}
|
550
|
+
}
|
551
|
+
|
552
|
+
template<typename A>
|
553
|
+
int HllArray<A>::hll4ArrBytes(const int lgConfigK) {
|
554
|
+
return 1 << (lgConfigK - 1);
|
555
|
+
}
|
556
|
+
|
557
|
+
template<typename A>
|
558
|
+
int HllArray<A>::hll6ArrBytes(const int lgConfigK) {
|
559
|
+
const int numSlots = 1 << lgConfigK;
|
560
|
+
return ((numSlots * 3) >> 2) + 1;
|
561
|
+
}
|
562
|
+
|
563
|
+
template<typename A>
|
564
|
+
int HllArray<A>::hll8ArrBytes(const int lgConfigK) {
|
565
|
+
return 1 << lgConfigK;
|
566
|
+
}
|
567
|
+
|
568
|
+
template<typename A>
|
569
|
+
int HllArray<A>::getMemDataStart() const {
|
570
|
+
return HllUtil<A>::HLL_BYTE_ARR_START;
|
571
|
+
}
|
572
|
+
|
573
|
+
template<typename A>
|
574
|
+
int HllArray<A>::getUpdatableSerializationBytes() const {
|
575
|
+
return HllUtil<A>::HLL_BYTE_ARR_START + getHllByteArrBytes();
|
576
|
+
}
|
577
|
+
|
578
|
+
template<typename A>
|
579
|
+
int HllArray<A>::getCompactSerializationBytes() const {
|
580
|
+
AuxHashMap<A>* auxHashMap = getAuxHashMap();
|
581
|
+
const int auxCountBytes = ((auxHashMap == nullptr) ? 0 : auxHashMap->getCompactSizeBytes());
|
582
|
+
return HllUtil<A>::HLL_BYTE_ARR_START + getHllByteArrBytes() + auxCountBytes;
|
583
|
+
}
|
584
|
+
|
585
|
+
template<typename A>
|
586
|
+
int HllArray<A>::getPreInts() const {
|
587
|
+
return HllUtil<A>::HLL_PREINTS;
|
588
|
+
}
|
589
|
+
|
590
|
+
template<typename A>
|
591
|
+
AuxHashMap<A>* HllArray<A>::getAuxHashMap() const {
|
592
|
+
return nullptr;
|
593
|
+
}
|
594
|
+
|
595
|
+
template<typename A>
|
596
|
+
void HllArray<A>::hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue) {
|
597
|
+
const int configK = 1 << this->getLgConfigK();
|
598
|
+
// update hip BEFORE updating kxq
|
599
|
+
if (!oooFlag) hipAccum += configK / (kxq0 + kxq1);
|
600
|
+
// update kxq0 and kxq1; subtract first, then add
|
601
|
+
if (oldValue < 32) { kxq0 -= INVERSE_POWERS_OF_2[oldValue]; }
|
602
|
+
else { kxq1 -= INVERSE_POWERS_OF_2[oldValue]; }
|
603
|
+
if (newValue < 32) { kxq0 += INVERSE_POWERS_OF_2[newValue]; }
|
604
|
+
else { kxq1 += INVERSE_POWERS_OF_2[newValue]; }
|
605
|
+
}
|
606
|
+
|
607
|
+
/**
|
608
|
+
* Estimator when N is small, roughly less than k log(k).
|
609
|
+
* Refer to Wikipedia: Coupon Collector Problem
|
610
|
+
* @return the very low range estimate
|
611
|
+
*/
|
612
|
+
//In C: again-two-registers.c hhb_get_improved_linear_counting_estimate L1274
|
613
|
+
template<typename A>
|
614
|
+
double HllArray<A>::getHllBitMapEstimate(const int lgConfigK, const int curMin, const int numAtCurMin) const {
|
615
|
+
const int configK = 1 << lgConfigK;
|
616
|
+
const int numUnhitBuckets = ((curMin == 0) ? numAtCurMin : 0);
|
617
|
+
|
618
|
+
//This will eventually go away.
|
619
|
+
if (numUnhitBuckets == 0) {
|
620
|
+
return configK * log(configK / 0.5);
|
621
|
+
}
|
622
|
+
|
623
|
+
const int numHitBuckets = configK - numUnhitBuckets;
|
624
|
+
return HarmonicNumbers<A>::getBitMapEstimate(configK, numHitBuckets);
|
625
|
+
}
|
626
|
+
|
627
|
+
//In C: again-two-registers.c hhb_get_raw_estimate L1167
|
628
|
+
template<typename A>
|
629
|
+
double HllArray<A>::getHllRawEstimate(const int lgConfigK, const double kxqSum) const {
|
630
|
+
const int configK = 1 << lgConfigK;
|
631
|
+
double correctionFactor;
|
632
|
+
if (lgConfigK == 4) { correctionFactor = 0.673; }
|
633
|
+
else if (lgConfigK == 5) { correctionFactor = 0.697; }
|
634
|
+
else if (lgConfigK == 6) { correctionFactor = 0.709; }
|
635
|
+
else { correctionFactor = 0.7213 / (1.0 + (1.079 / configK)); }
|
636
|
+
const double hyperEst = (correctionFactor * configK * configK) / kxqSum;
|
637
|
+
return hyperEst;
|
638
|
+
}
|
639
|
+
|
640
|
+
template<typename A>
|
641
|
+
typename HllArray<A>::const_iterator HllArray<A>::begin(bool all) const {
|
642
|
+
return const_iterator(hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, nullptr, 0, all);
|
643
|
+
}
|
644
|
+
|
645
|
+
template<typename A>
|
646
|
+
typename HllArray<A>::const_iterator HllArray<A>::end() const {
|
647
|
+
return const_iterator(hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, nullptr, 0, false);
|
648
|
+
}
|
649
|
+
|
650
|
+
template<typename A>
|
651
|
+
HllArray<A>::const_iterator::const_iterator(const uint8_t* array, size_t array_size, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all):
|
652
|
+
array(array), array_size(array_size), index(index), hll_type(hll_type), exceptions(exceptions), offset(offset), all(all)
|
653
|
+
{
|
654
|
+
while (this->index < array_size) {
|
655
|
+
value = get_value(array, this->index, hll_type, exceptions, offset);
|
656
|
+
if (all || value != HllUtil<A>::EMPTY) break;
|
657
|
+
this->index++;
|
658
|
+
}
|
659
|
+
}
|
660
|
+
|
661
|
+
template<typename A>
|
662
|
+
typename HllArray<A>::const_iterator& HllArray<A>::const_iterator::operator++() {
|
663
|
+
while (++index < array_size) {
|
664
|
+
value = get_value(array, index, hll_type, exceptions, offset);
|
665
|
+
if (all || value != HllUtil<A>::EMPTY) break;
|
666
|
+
}
|
667
|
+
return *this;
|
668
|
+
}
|
669
|
+
|
670
|
+
template<typename A>
|
671
|
+
bool HllArray<A>::const_iterator::operator!=(const const_iterator& other) const {
|
672
|
+
return index != other.index;
|
673
|
+
}
|
674
|
+
|
675
|
+
template<typename A>
|
676
|
+
uint32_t HllArray<A>::const_iterator::operator*() const {
|
677
|
+
return HllUtil<A>::pair(index, value);
|
678
|
+
}
|
679
|
+
|
680
|
+
template<typename A>
|
681
|
+
uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset) {
|
682
|
+
if (hll_type == target_hll_type::HLL_4) {
|
683
|
+
uint8_t value = array[index >> 1];
|
684
|
+
if ((index & 1) > 0) { // odd
|
685
|
+
value >>= 4;
|
686
|
+
} else {
|
687
|
+
value &= HllUtil<A>::loNibbleMask;
|
688
|
+
}
|
689
|
+
if (value == HllUtil<A>::AUX_TOKEN) { // exception
|
690
|
+
return exceptions->mustFindValueFor(index);
|
691
|
+
}
|
692
|
+
return value + offset;
|
693
|
+
} else if (hll_type == target_hll_type::HLL_6) {
|
694
|
+
const int start_bit = index * 6;
|
695
|
+
const int shift = start_bit & 0x7;
|
696
|
+
const int byte_idx = start_bit >> 3;
|
697
|
+
const uint16_t two_byte_val = (array[byte_idx + 1] << 8) | array[byte_idx];
|
698
|
+
return (two_byte_val >> shift) & HllUtil<A>::VAL_MASK_6;
|
699
|
+
}
|
700
|
+
// HLL_8
|
701
|
+
return array[index];
|
702
|
+
}
|
703
|
+
|
704
|
+
}
|
705
|
+
|
706
|
+
#endif // _HLLARRAY_INTERNAL_HPP_
|