datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef _HLLARRAY_HPP_
|
|
21
|
+
#define _HLLARRAY_HPP_
|
|
22
|
+
|
|
23
|
+
#include "HllSketchImpl.hpp"
|
|
24
|
+
#include "HllUtil.hpp"
|
|
25
|
+
|
|
26
|
+
namespace datasketches {
|
|
27
|
+
|
|
28
|
+
template<typename A>
|
|
29
|
+
class AuxHashMap;
|
|
30
|
+
|
|
31
|
+
template<typename A = std::allocator<char>>
|
|
32
|
+
class HllArray : public HllSketchImpl<A> {
|
|
33
|
+
public:
|
|
34
|
+
explicit HllArray(int lgConfigK, target_hll_type tgtHllType, bool startFullSize);
|
|
35
|
+
explicit HllArray(const HllArray<A>& that);
|
|
36
|
+
|
|
37
|
+
static HllArray* newHll(const void* bytes, size_t len);
|
|
38
|
+
static HllArray* newHll(std::istream& is);
|
|
39
|
+
|
|
40
|
+
virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
|
|
41
|
+
virtual void serialize(std::ostream& os, bool compact) const;
|
|
42
|
+
|
|
43
|
+
virtual ~HllArray();
|
|
44
|
+
virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const = 0;
|
|
45
|
+
|
|
46
|
+
virtual HllArray* copy() const = 0;
|
|
47
|
+
virtual HllArray* copyAs(target_hll_type tgtHllType) const;
|
|
48
|
+
|
|
49
|
+
virtual HllSketchImpl<A>* couponUpdate(int coupon) = 0;
|
|
50
|
+
|
|
51
|
+
virtual double getEstimate() const;
|
|
52
|
+
virtual double getCompositeEstimate() const;
|
|
53
|
+
virtual double getLowerBound(int numStdDev) const;
|
|
54
|
+
virtual double getUpperBound(int numStdDev) const;
|
|
55
|
+
|
|
56
|
+
inline void addToHipAccum(double delta);
|
|
57
|
+
|
|
58
|
+
inline void decNumAtCurMin();
|
|
59
|
+
|
|
60
|
+
inline int getCurMin() const;
|
|
61
|
+
inline int getNumAtCurMin() const;
|
|
62
|
+
inline double getHipAccum() const;
|
|
63
|
+
|
|
64
|
+
virtual int getHllByteArrBytes() const = 0;
|
|
65
|
+
|
|
66
|
+
virtual int getUpdatableSerializationBytes() const;
|
|
67
|
+
virtual int getCompactSerializationBytes() const;
|
|
68
|
+
|
|
69
|
+
virtual bool isOutOfOrderFlag() const;
|
|
70
|
+
virtual bool isEmpty() const;
|
|
71
|
+
virtual bool isCompact() const;
|
|
72
|
+
|
|
73
|
+
virtual void putOutOfOrderFlag(bool flag);
|
|
74
|
+
|
|
75
|
+
inline double getKxQ0() const;
|
|
76
|
+
inline double getKxQ1() const;
|
|
77
|
+
|
|
78
|
+
virtual int getMemDataStart() const;
|
|
79
|
+
virtual int getPreInts() const;
|
|
80
|
+
|
|
81
|
+
void putCurMin(int curMin);
|
|
82
|
+
void putHipAccum(double hipAccum);
|
|
83
|
+
inline void putKxQ0(double kxq0);
|
|
84
|
+
inline void putKxQ1(double kxq1);
|
|
85
|
+
void putNumAtCurMin(int numAtCurMin);
|
|
86
|
+
|
|
87
|
+
static int hllArrBytes(target_hll_type tgtHllType, int lgConfigK);
|
|
88
|
+
static int hll4ArrBytes(int lgConfigK);
|
|
89
|
+
static int hll6ArrBytes(int lgConfigK);
|
|
90
|
+
static int hll8ArrBytes(int lgConfigK);
|
|
91
|
+
|
|
92
|
+
virtual AuxHashMap<A>* getAuxHashMap() const;
|
|
93
|
+
|
|
94
|
+
class const_iterator;
|
|
95
|
+
virtual const_iterator begin(bool all = false) const;
|
|
96
|
+
virtual const_iterator end() const;
|
|
97
|
+
|
|
98
|
+
protected:
|
|
99
|
+
void hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue);
|
|
100
|
+
double getHllBitMapEstimate(int lgConfigK, int curMin, int numAtCurMin) const;
|
|
101
|
+
double getHllRawEstimate(int lgConfigK, double kxqSum) const;
|
|
102
|
+
|
|
103
|
+
double hipAccum;
|
|
104
|
+
double kxq0;
|
|
105
|
+
double kxq1;
|
|
106
|
+
uint8_t* hllByteArr; //init by sub-classes
|
|
107
|
+
int curMin; //always zero for Hll6 and Hll8, only tracked by Hll4Array
|
|
108
|
+
int numAtCurMin; //interpreted as num zeros when curMin == 0
|
|
109
|
+
bool oooFlag; //Out-Of-Order Flag
|
|
110
|
+
|
|
111
|
+
friend class HllSketchImplFactory<A>;
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
template<typename A>
|
|
115
|
+
class HllArray<A>::const_iterator: public std::iterator<std::input_iterator_tag, uint32_t> {
|
|
116
|
+
public:
|
|
117
|
+
const_iterator(const uint8_t* array, size_t array_slze, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all);
|
|
118
|
+
//const_iterator(const uint8_t* array, size_t array_slze, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset);
|
|
119
|
+
const_iterator& operator++();
|
|
120
|
+
bool operator!=(const const_iterator& other) const;
|
|
121
|
+
uint32_t operator*() const;
|
|
122
|
+
private:
|
|
123
|
+
const uint8_t* array;
|
|
124
|
+
size_t array_size;
|
|
125
|
+
size_t index;
|
|
126
|
+
target_hll_type hll_type;
|
|
127
|
+
const AuxHashMap<A>* exceptions;
|
|
128
|
+
uint8_t offset;
|
|
129
|
+
bool all;
|
|
130
|
+
uint8_t value; // cached value to avoid computing in operator++ and in operator*()
|
|
131
|
+
static inline uint8_t get_value(const uint8_t* array, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset);
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
#endif /* _HLLARRAY_HPP_ */
|
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef _HLLSKETCH_INTERNAL_HPP_
|
|
21
|
+
#define _HLLSKETCH_INTERNAL_HPP_
|
|
22
|
+
|
|
23
|
+
#include "hll.hpp"
|
|
24
|
+
#include "HllUtil.hpp"
|
|
25
|
+
#include "HllSketchImplFactory.hpp"
|
|
26
|
+
#include "CouponList.hpp"
|
|
27
|
+
#include "HllArray.hpp"
|
|
28
|
+
#include "common_defs.hpp"
|
|
29
|
+
|
|
30
|
+
#include <cstdio>
|
|
31
|
+
#include <cstdlib>
|
|
32
|
+
#include <string>
|
|
33
|
+
#include <iostream>
|
|
34
|
+
#include <sstream>
|
|
35
|
+
#include <iomanip>
|
|
36
|
+
|
|
37
|
+
namespace datasketches {
|
|
38
|
+
|
|
39
|
+
typedef union {
|
|
40
|
+
int64_t longBytes;
|
|
41
|
+
double doubleBytes;
|
|
42
|
+
} longDoubleUnion;
|
|
43
|
+
|
|
44
|
+
template<typename A>
|
|
45
|
+
hll_sketch_alloc<A>::hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type, bool start_full_size) {
|
|
46
|
+
HllUtil<A>::checkLgK(lg_config_k);
|
|
47
|
+
if (start_full_size) {
|
|
48
|
+
sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size);
|
|
49
|
+
} else {
|
|
50
|
+
typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
|
|
51
|
+
sketch_impl = new (clAlloc().allocate(1)) CouponList<A>(lg_config_k, tgt_type, hll_mode::LIST);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
template<typename A>
|
|
56
|
+
hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(std::istream& is) {
|
|
57
|
+
HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(is);
|
|
58
|
+
hll_sketch_alloc<A> sketch(impl);
|
|
59
|
+
return sketch;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
template<typename A>
|
|
63
|
+
hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(const void* bytes, size_t len) {
|
|
64
|
+
HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(bytes, len);
|
|
65
|
+
hll_sketch_alloc<A> sketch(impl);
|
|
66
|
+
return sketch;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
template<typename A>
|
|
70
|
+
hll_sketch_alloc<A>::~hll_sketch_alloc() {
|
|
71
|
+
if (sketch_impl != nullptr) {
|
|
72
|
+
sketch_impl->get_deleter()(sketch_impl);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
template<typename A>
|
|
77
|
+
hll_sketch_alloc<A>::hll_sketch_alloc(const hll_sketch_alloc<A>& that) :
|
|
78
|
+
sketch_impl(that.sketch_impl->copy())
|
|
79
|
+
{}
|
|
80
|
+
|
|
81
|
+
template<typename A>
|
|
82
|
+
hll_sketch_alloc<A>::hll_sketch_alloc(const hll_sketch_alloc<A>& that, target_hll_type tgt_type) :
|
|
83
|
+
sketch_impl(that.sketch_impl->copyAs(tgt_type))
|
|
84
|
+
{}
|
|
85
|
+
|
|
86
|
+
template<typename A>
|
|
87
|
+
hll_sketch_alloc<A>::hll_sketch_alloc(hll_sketch_alloc<A>&& that) noexcept :
|
|
88
|
+
sketch_impl(nullptr)
|
|
89
|
+
{
|
|
90
|
+
std::swap(sketch_impl, that.sketch_impl);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
template<typename A>
|
|
94
|
+
hll_sketch_alloc<A>::hll_sketch_alloc(HllSketchImpl<A>* that) :
|
|
95
|
+
sketch_impl(that)
|
|
96
|
+
{}
|
|
97
|
+
|
|
98
|
+
template<typename A>
|
|
99
|
+
hll_sketch_alloc<A> hll_sketch_alloc<A>::operator=(const hll_sketch_alloc<A>& other) {
|
|
100
|
+
sketch_impl->get_deleter()(sketch_impl);
|
|
101
|
+
sketch_impl = other.sketch_impl->copy();
|
|
102
|
+
return *this;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
template<typename A>
|
|
106
|
+
hll_sketch_alloc<A> hll_sketch_alloc<A>::operator=(hll_sketch_alloc<A>&& other) {
|
|
107
|
+
std::swap(sketch_impl, other.sketch_impl);
|
|
108
|
+
return *this;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
template<typename A>
|
|
112
|
+
void hll_sketch_alloc<A>::reset() {
|
|
113
|
+
// TODO: need to allow starting from a full-sized sketch
|
|
114
|
+
// (either here or in other implementation)
|
|
115
|
+
sketch_impl = sketch_impl->reset();
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
template<typename A>
|
|
119
|
+
void hll_sketch_alloc<A>::update(const std::string& datum) {
|
|
120
|
+
if (datum.empty()) { return; }
|
|
121
|
+
HashState hashResult;
|
|
122
|
+
HllUtil<A>::hash(datum.c_str(), datum.length(), DEFAULT_SEED, hashResult);
|
|
123
|
+
coupon_update(HllUtil<A>::coupon(hashResult));
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
template<typename A>
|
|
127
|
+
void hll_sketch_alloc<A>::update(const uint64_t datum) {
|
|
128
|
+
// no sign extension with 64 bits so no need to cast to signed value
|
|
129
|
+
HashState hashResult;
|
|
130
|
+
HllUtil<A>::hash(&datum, sizeof(uint64_t), DEFAULT_SEED, hashResult);
|
|
131
|
+
coupon_update(HllUtil<A>::coupon(hashResult));
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
template<typename A>
|
|
135
|
+
void hll_sketch_alloc<A>::update(const uint32_t datum) {
|
|
136
|
+
update(static_cast<int32_t>(datum));
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
template<typename A>
|
|
140
|
+
void hll_sketch_alloc<A>::update(const uint16_t datum) {
|
|
141
|
+
update(static_cast<int16_t>(datum));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
template<typename A>
|
|
145
|
+
void hll_sketch_alloc<A>::update(const uint8_t datum) {
|
|
146
|
+
update(static_cast<int8_t>(datum));
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
template<typename A>
|
|
150
|
+
void hll_sketch_alloc<A>::update(const int64_t datum) {
|
|
151
|
+
HashState hashResult;
|
|
152
|
+
HllUtil<A>::hash(&datum, sizeof(int64_t), DEFAULT_SEED, hashResult);
|
|
153
|
+
coupon_update(HllUtil<A>::coupon(hashResult));
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
template<typename A>
|
|
157
|
+
void hll_sketch_alloc<A>::update(const int32_t datum) {
|
|
158
|
+
int64_t val = static_cast<int64_t>(datum);
|
|
159
|
+
HashState hashResult;
|
|
160
|
+
HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
|
|
161
|
+
coupon_update(HllUtil<A>::coupon(hashResult));
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
template<typename A>
|
|
165
|
+
void hll_sketch_alloc<A>::update(const int16_t datum) {
|
|
166
|
+
int64_t val = static_cast<int64_t>(datum);
|
|
167
|
+
HashState hashResult;
|
|
168
|
+
HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
|
|
169
|
+
coupon_update(HllUtil<A>::coupon(hashResult));
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
template<typename A>
|
|
173
|
+
void hll_sketch_alloc<A>::update(const int8_t datum) {
|
|
174
|
+
int64_t val = static_cast<int64_t>(datum);
|
|
175
|
+
HashState hashResult;
|
|
176
|
+
HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
|
|
177
|
+
coupon_update(HllUtil<A>::coupon(hashResult));
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
template<typename A>
|
|
181
|
+
void hll_sketch_alloc<A>::update(const double datum) {
|
|
182
|
+
longDoubleUnion d;
|
|
183
|
+
d.doubleBytes = static_cast<double>(datum);
|
|
184
|
+
if (datum == 0.0) {
|
|
185
|
+
d.doubleBytes = 0.0; // canonicalize -0.0 to 0.0
|
|
186
|
+
} else if (std::isnan(d.doubleBytes)) {
|
|
187
|
+
d.longBytes = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
|
|
188
|
+
}
|
|
189
|
+
HashState hashResult;
|
|
190
|
+
HllUtil<A>::hash(&d, sizeof(double), DEFAULT_SEED, hashResult);
|
|
191
|
+
coupon_update(HllUtil<A>::coupon(hashResult));
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
template<typename A>
|
|
195
|
+
void hll_sketch_alloc<A>::update(const float datum) {
|
|
196
|
+
longDoubleUnion d;
|
|
197
|
+
d.doubleBytes = static_cast<double>(datum);
|
|
198
|
+
if (datum == 0.0) {
|
|
199
|
+
d.doubleBytes = 0.0; // canonicalize -0.0 to 0.0
|
|
200
|
+
} else if (std::isnan(d.doubleBytes)) {
|
|
201
|
+
d.longBytes = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
|
|
202
|
+
}
|
|
203
|
+
HashState hashResult;
|
|
204
|
+
HllUtil<A>::hash(&d, sizeof(double), DEFAULT_SEED, hashResult);
|
|
205
|
+
coupon_update(HllUtil<A>::coupon(hashResult));
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
template<typename A>
|
|
209
|
+
void hll_sketch_alloc<A>::update(const void* data, const size_t lengthBytes) {
|
|
210
|
+
if (data == nullptr) { return; }
|
|
211
|
+
HashState hashResult;
|
|
212
|
+
HllUtil<A>::hash(data, lengthBytes, DEFAULT_SEED, hashResult);
|
|
213
|
+
coupon_update(HllUtil<A>::coupon(hashResult));
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
template<typename A>
|
|
217
|
+
void hll_sketch_alloc<A>::coupon_update(int coupon) {
|
|
218
|
+
if (coupon == HllUtil<A>::EMPTY) { return; }
|
|
219
|
+
HllSketchImpl<A>* result = this->sketch_impl->couponUpdate(coupon);
|
|
220
|
+
if (result != this->sketch_impl) {
|
|
221
|
+
this->sketch_impl->get_deleter()(this->sketch_impl);
|
|
222
|
+
this->sketch_impl = result;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
template<typename A>
|
|
227
|
+
void hll_sketch_alloc<A>::serialize_compact(std::ostream& os) const {
|
|
228
|
+
return sketch_impl->serialize(os, true);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
template<typename A>
|
|
232
|
+
void hll_sketch_alloc<A>::serialize_updatable(std::ostream& os) const {
|
|
233
|
+
return sketch_impl->serialize(os, false);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
template<typename A>
|
|
237
|
+
vector_u8<A> hll_sketch_alloc<A>::serialize_compact(unsigned header_size_bytes) const {
|
|
238
|
+
return sketch_impl->serialize(true, header_size_bytes);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
template<typename A>
|
|
242
|
+
vector_u8<A> hll_sketch_alloc<A>::serialize_updatable() const {
|
|
243
|
+
return sketch_impl->serialize(false, 0);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
template<typename A>
|
|
247
|
+
string<A> hll_sketch_alloc<A>::to_string(const bool summary,
|
|
248
|
+
const bool detail,
|
|
249
|
+
const bool aux_detail,
|
|
250
|
+
const bool all) const {
|
|
251
|
+
std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
|
|
252
|
+
if (summary) {
|
|
253
|
+
os << "### HLL sketch summary:" << std::endl
|
|
254
|
+
<< " Log Config K : " << get_lg_config_k() << std::endl
|
|
255
|
+
<< " Hll Target : " << type_as_string() << std::endl
|
|
256
|
+
<< " Current Mode : " << mode_as_string() << std::endl
|
|
257
|
+
<< " LB : " << get_lower_bound(1) << std::endl
|
|
258
|
+
<< " Estimate : " << get_estimate() << std::endl
|
|
259
|
+
<< " UB : " << get_upper_bound(1) << std::endl
|
|
260
|
+
<< " OutOfOrder flag: " << (is_out_of_order_flag() ? "true" : "false") << std::endl;
|
|
261
|
+
if (get_current_mode() == HLL) {
|
|
262
|
+
HllArray<A>* hllArray = (HllArray<A>*) sketch_impl;
|
|
263
|
+
os << " CurMin : " << hllArray->getCurMin() << std::endl
|
|
264
|
+
<< " NumAtCurMin : " << hllArray->getNumAtCurMin() << std::endl
|
|
265
|
+
<< " HipAccum : " << hllArray->getHipAccum() << std::endl
|
|
266
|
+
<< " KxQ0 : " << hllArray->getKxQ0() << std::endl
|
|
267
|
+
<< " KxQ1 : " << hllArray->getKxQ1() << std::endl;
|
|
268
|
+
if (get_target_type() == HLL_4) {
|
|
269
|
+
const Hll4Array<A>* hll4_ptr = static_cast<const Hll4Array<A>*>(sketch_impl);
|
|
270
|
+
os << " Aux table? : " << (hll4_ptr->getAuxHashMap() != nullptr ? "true" : "false") << std::endl;
|
|
271
|
+
}
|
|
272
|
+
} else {
|
|
273
|
+
os << " Coupon count : "
|
|
274
|
+
<< std::to_string(((CouponList<A>*) sketch_impl)->getCouponCount()) << std::endl;
|
|
275
|
+
}
|
|
276
|
+
os << "### End HLL sketch summary" << std::endl;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
if (detail) {
|
|
280
|
+
os << "### HLL sketch data detail:" << std::endl;
|
|
281
|
+
if (get_current_mode() == HLL) {
|
|
282
|
+
const HllArray<A>* hll_ptr = static_cast<const HllArray<A>*>(sketch_impl);
|
|
283
|
+
os << std::left << std::setw(10) << "Slot" << std::setw(6) << "Value" << std::endl;
|
|
284
|
+
auto it = hll_ptr->begin(all);
|
|
285
|
+
while (it != hll_ptr->end()) {
|
|
286
|
+
os << std::setw(10) << HllUtil<A>::getLow26(*it);
|
|
287
|
+
os << std::setw(6) << HllUtil<A>::getValue(*it);
|
|
288
|
+
os << std::endl;
|
|
289
|
+
++it;
|
|
290
|
+
}
|
|
291
|
+
} else {
|
|
292
|
+
const CouponList<A>* list_ptr = static_cast<const CouponList<A>*>(sketch_impl);
|
|
293
|
+
os << std::left;
|
|
294
|
+
os << std::setw(10) << "Index";
|
|
295
|
+
os << std::setw(10) << "Key";
|
|
296
|
+
os << std::setw(10) << "Slot";
|
|
297
|
+
os << std::setw(6) << "Value";
|
|
298
|
+
os << std::endl;
|
|
299
|
+
auto it = list_ptr->begin(all);
|
|
300
|
+
int i = 0;
|
|
301
|
+
int mask = (1 << get_lg_config_k()) - 1;
|
|
302
|
+
while (it != list_ptr->end()) {
|
|
303
|
+
os << std::setw(10) << i;
|
|
304
|
+
os << std::setw(10) << HllUtil<A>::getLow26(*it);
|
|
305
|
+
os << std::setw(10) << (HllUtil<A>::getLow26(*it) & mask);
|
|
306
|
+
os << std::setw(6) << HllUtil<A>::getValue(*it);
|
|
307
|
+
os << std::endl;
|
|
308
|
+
++it;
|
|
309
|
+
++i;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
os << "### End HLL sketch data detail" << std::endl;
|
|
313
|
+
}
|
|
314
|
+
if (aux_detail) {
|
|
315
|
+
if ((get_current_mode() == HLL) && (get_target_type() == HLL_4)) {
|
|
316
|
+
const Hll4Array<A>* hll4_ptr = static_cast<const Hll4Array<A>*>(sketch_impl);
|
|
317
|
+
const AuxHashMap<A>* aux_ptr = hll4_ptr->getAuxHashMap();
|
|
318
|
+
if (aux_ptr != nullptr) {
|
|
319
|
+
os << "### HLL sketch aux detail:" << std::endl;
|
|
320
|
+
os << std::left;
|
|
321
|
+
os << std::setw(10) << "Index";
|
|
322
|
+
os << std::setw(10) << "Key";
|
|
323
|
+
os << std::setw(10) << "Slot";
|
|
324
|
+
os << std::setw(6) << "Value";
|
|
325
|
+
os << std::endl;
|
|
326
|
+
auto it = aux_ptr->begin(all);
|
|
327
|
+
int i = 0;
|
|
328
|
+
int mask = (1 << get_lg_config_k()) - 1;
|
|
329
|
+
while (it != aux_ptr->end()) {
|
|
330
|
+
os << std::setw(10) << i;
|
|
331
|
+
os << std::setw(10) << HllUtil<A>::getLow26(*it);
|
|
332
|
+
os << std::setw(10) << (HllUtil<A>::getLow26(*it) & mask);
|
|
333
|
+
os << std::setw(6) << HllUtil<A>::getValue(*it);
|
|
334
|
+
os << std::endl;
|
|
335
|
+
++it;
|
|
336
|
+
++i;
|
|
337
|
+
}
|
|
338
|
+
os << "### End HLL sketch aux detail" << std::endl;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
return os.str();
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
template<typename A>
|
|
347
|
+
double hll_sketch_alloc<A>::get_estimate() const {
|
|
348
|
+
return sketch_impl->getEstimate();
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
template<typename A>
|
|
352
|
+
double hll_sketch_alloc<A>::get_composite_estimate() const {
|
|
353
|
+
return sketch_impl->getCompositeEstimate();
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
template<typename A>
|
|
357
|
+
double hll_sketch_alloc<A>::get_lower_bound(int numStdDev) const {
|
|
358
|
+
return sketch_impl->getLowerBound(numStdDev);
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
template<typename A>
|
|
362
|
+
double hll_sketch_alloc<A>::get_upper_bound(int numStdDev) const {
|
|
363
|
+
return sketch_impl->getUpperBound(numStdDev);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
template<typename A>
|
|
367
|
+
hll_mode hll_sketch_alloc<A>::get_current_mode() const {
|
|
368
|
+
return sketch_impl->getCurMode();
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
template<typename A>
|
|
372
|
+
int hll_sketch_alloc<A>::get_lg_config_k() const {
|
|
373
|
+
return sketch_impl->getLgConfigK();
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
template<typename A>
|
|
377
|
+
target_hll_type hll_sketch_alloc<A>::get_target_type() const {
|
|
378
|
+
return sketch_impl->getTgtHllType();
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
template<typename A>
|
|
382
|
+
bool hll_sketch_alloc<A>::is_out_of_order_flag() const {
|
|
383
|
+
return sketch_impl->isOutOfOrderFlag();
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
template<typename A>
|
|
387
|
+
bool hll_sketch_alloc<A>::is_estimation_mode() const {
|
|
388
|
+
return true;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
template<typename A>
|
|
392
|
+
int hll_sketch_alloc<A>::get_updatable_serialization_bytes() const {
|
|
393
|
+
return sketch_impl->getUpdatableSerializationBytes();
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
template<typename A>
|
|
397
|
+
int hll_sketch_alloc<A>::get_compact_serialization_bytes() const {
|
|
398
|
+
return sketch_impl->getCompactSerializationBytes();
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
template<typename A>
|
|
402
|
+
bool hll_sketch_alloc<A>::is_compact() const {
|
|
403
|
+
return sketch_impl->isCompact();
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
template<typename A>
|
|
407
|
+
bool hll_sketch_alloc<A>::is_empty() const {
|
|
408
|
+
return sketch_impl->isEmpty();
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
template<typename A>
|
|
412
|
+
std::string hll_sketch_alloc<A>::type_as_string() const {
|
|
413
|
+
switch (sketch_impl->getTgtHllType()) {
|
|
414
|
+
case target_hll_type::HLL_4:
|
|
415
|
+
return std::string("HLL_4");
|
|
416
|
+
case target_hll_type::HLL_6:
|
|
417
|
+
return std::string("HLL_6");
|
|
418
|
+
case target_hll_type::HLL_8:
|
|
419
|
+
return std::string("HLL_8");
|
|
420
|
+
default:
|
|
421
|
+
throw std::runtime_error("Sketch state error: Invalid target_hll_type");
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
template<typename A>
|
|
426
|
+
std::string hll_sketch_alloc<A>::mode_as_string() const {
|
|
427
|
+
switch (sketch_impl->getCurMode()) {
|
|
428
|
+
case LIST:
|
|
429
|
+
return std::string("LIST");
|
|
430
|
+
case SET:
|
|
431
|
+
return std::string("SET");
|
|
432
|
+
case HLL:
|
|
433
|
+
return std::string("HLL");
|
|
434
|
+
default:
|
|
435
|
+
throw std::runtime_error("Sketch state error: Invalid hll_mode");
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
template<typename A>
|
|
440
|
+
int hll_sketch_alloc<A>::get_max_updatable_serialization_bytes(const int lg_config_k,
|
|
441
|
+
const target_hll_type tgtHllType) {
|
|
442
|
+
int arrBytes;
|
|
443
|
+
if (tgtHllType == target_hll_type::HLL_4) {
|
|
444
|
+
const int auxBytes = 4 << HllUtil<A>::LG_AUX_ARR_INTS[lg_config_k];
|
|
445
|
+
arrBytes = HllArray<A>::hll4ArrBytes(lg_config_k) + auxBytes;
|
|
446
|
+
} else if (tgtHllType == target_hll_type::HLL_6) {
|
|
447
|
+
arrBytes = HllArray<A>::hll6ArrBytes(lg_config_k);
|
|
448
|
+
} else { //HLL_8
|
|
449
|
+
arrBytes = HllArray<A>::hll8ArrBytes(lg_config_k);
|
|
450
|
+
}
|
|
451
|
+
return HllUtil<A>::HLL_BYTE_ARR_START + arrBytes;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
template<typename A>
|
|
455
|
+
double hll_sketch_alloc<A>::get_rel_err(const bool upperBound, const bool unioned,
|
|
456
|
+
const int lg_config_k, const int numStdDev) {
|
|
457
|
+
return HllUtil<A>::getRelErr(upperBound, unioned, lg_config_k, numStdDev);
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
#endif // _HLLSKETCH_INTERNAL_HPP_
|