datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef THETA_UNION_BASE_HPP_
|
|
21
|
+
#define THETA_UNION_BASE_HPP_
|
|
22
|
+
|
|
23
|
+
#include "theta_update_sketch_base.hpp"
|
|
24
|
+
|
|
25
|
+
namespace datasketches {
|
|
26
|
+
|
|
27
|
+
template<
|
|
28
|
+
typename Entry,
|
|
29
|
+
typename ExtractKey,
|
|
30
|
+
typename Policy,
|
|
31
|
+
typename Sketch,
|
|
32
|
+
typename CompactSketch,
|
|
33
|
+
typename Allocator = std::allocator<Entry>
|
|
34
|
+
>
|
|
35
|
+
class theta_union_base {
|
|
36
|
+
public:
|
|
37
|
+
using hash_table = theta_update_sketch_base<Entry, ExtractKey, Allocator>;
|
|
38
|
+
using resize_factor = typename hash_table::resize_factor;
|
|
39
|
+
using comparator = compare_by_key<ExtractKey>;
|
|
40
|
+
|
|
41
|
+
theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
42
|
+
|
|
43
|
+
template<typename FwdSketch>
|
|
44
|
+
void update(FwdSketch&& sketch);
|
|
45
|
+
|
|
46
|
+
CompactSketch get_result(bool ordered = true) const;
|
|
47
|
+
|
|
48
|
+
const Policy& get_policy() const;
|
|
49
|
+
|
|
50
|
+
private:
|
|
51
|
+
Policy policy_;
|
|
52
|
+
hash_table table_;
|
|
53
|
+
uint64_t union_theta_;
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
} /* namespace datasketches */
|
|
57
|
+
|
|
58
|
+
#include "theta_union_base_impl.hpp"
|
|
59
|
+
|
|
60
|
+
#endif
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <algorithm>
|
|
21
|
+
|
|
22
|
+
#include "conditional_forward.hpp"
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
|
|
27
|
+
theta_union_base<EN, EK, P, S, CS, A>::theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
|
|
28
|
+
uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
|
29
|
+
policy_(policy),
|
|
30
|
+
table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator),
|
|
31
|
+
union_theta_(table_.theta_)
|
|
32
|
+
{}
|
|
33
|
+
|
|
34
|
+
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
|
|
35
|
+
template<typename SS>
|
|
36
|
+
void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
|
|
37
|
+
if (sketch.is_empty()) return;
|
|
38
|
+
if (sketch.get_seed_hash() != compute_seed_hash(table_.seed_)) throw std::invalid_argument("seed hash mismatch");
|
|
39
|
+
table_.is_empty_ = false;
|
|
40
|
+
if (sketch.get_theta64() < union_theta_) union_theta_ = sketch.get_theta64();
|
|
41
|
+
for (auto& entry: sketch) {
|
|
42
|
+
const uint64_t hash = EK()(entry);
|
|
43
|
+
if (hash < union_theta_) {
|
|
44
|
+
auto result = table_.find(hash);
|
|
45
|
+
if (!result.second) {
|
|
46
|
+
table_.insert(result.first, conditional_forward<SS>(entry));
|
|
47
|
+
} else {
|
|
48
|
+
policy_(*result.first, conditional_forward<SS>(entry));
|
|
49
|
+
}
|
|
50
|
+
} else {
|
|
51
|
+
if (sketch.is_ordered()) break; // early stop
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
if (table_.theta_ < union_theta_) union_theta_ = table_.theta_;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
|
|
58
|
+
CS theta_union_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const {
|
|
59
|
+
std::vector<EN, A> entries(table_.allocator_);
|
|
60
|
+
if (table_.is_empty_) return CS(true, true, compute_seed_hash(table_.seed_), union_theta_, std::move(entries));
|
|
61
|
+
entries.reserve(table_.num_entries_);
|
|
62
|
+
uint64_t theta = std::min(union_theta_, table_.theta_);
|
|
63
|
+
const uint32_t nominal_num = 1 << table_.lg_nom_size_;
|
|
64
|
+
if (union_theta_ >= theta && table_.num_entries_ <= nominal_num) {
|
|
65
|
+
std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero<EN, EK>());
|
|
66
|
+
} else {
|
|
67
|
+
std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero_less_than<uint64_t, EN, EK>(theta));
|
|
68
|
+
if (entries.size() > nominal_num) {
|
|
69
|
+
std::nth_element(entries.begin(), entries.begin() + nominal_num, entries.end(), comparator());
|
|
70
|
+
theta = EK()(entries[nominal_num]);
|
|
71
|
+
entries.erase(entries.begin() + nominal_num, entries.end());
|
|
72
|
+
entries.shrink_to_fit();
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
if (ordered) std::sort(entries.begin(), entries.end(), comparator());
|
|
76
|
+
return CS(table_.is_empty_, ordered, compute_seed_hash(table_.seed_), theta, std::move(entries));
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
|
|
80
|
+
const P& theta_union_base<EN, EK, P, S, CS, A>::get_policy() const {
|
|
81
|
+
return policy_;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef THETA_UNION_EXPERIMENTAL_HPP_
|
|
21
|
+
#define THETA_UNION_EXPERIMENTAL_HPP_
|
|
22
|
+
|
|
23
|
+
#include "serde.hpp"
|
|
24
|
+
#include "tuple_sketch.hpp"
|
|
25
|
+
#include "theta_union_base.hpp"
|
|
26
|
+
#include "theta_sketch_experimental.hpp"
|
|
27
|
+
|
|
28
|
+
namespace datasketches {
|
|
29
|
+
|
|
30
|
+
// experimental theta union derived from the same base as tuple union
|
|
31
|
+
|
|
32
|
+
template<typename Allocator = std::allocator<uint64_t>>
|
|
33
|
+
class theta_union_experimental {
|
|
34
|
+
public:
|
|
35
|
+
using Entry = uint64_t;
|
|
36
|
+
using ExtractKey = trivial_extract_key;
|
|
37
|
+
using Sketch = theta_sketch_experimental<Allocator>;
|
|
38
|
+
using CompactSketch = compact_theta_sketch_experimental<Allocator>;
|
|
39
|
+
using resize_factor = theta_constants::resize_factor;
|
|
40
|
+
|
|
41
|
+
struct pass_through_policy {
|
|
42
|
+
uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
|
|
43
|
+
unused(incoming_entry);
|
|
44
|
+
return internal_entry;
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
using State = theta_union_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
|
|
48
|
+
|
|
49
|
+
// No constructor here. Use builder instead.
|
|
50
|
+
class builder;
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* This method is to update the union with a given sketch
|
|
54
|
+
* @param sketch to update the union with
|
|
55
|
+
*/
|
|
56
|
+
void update(const Sketch& sketch);
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* This method produces a copy of the current state of the union as a compact sketch.
|
|
60
|
+
* @param ordered optional flag to specify if ordered sketch should be produced
|
|
61
|
+
* @return the result of the union
|
|
62
|
+
*/
|
|
63
|
+
CompactSketch get_result(bool ordered = true) const;
|
|
64
|
+
|
|
65
|
+
private:
|
|
66
|
+
State state_;
|
|
67
|
+
|
|
68
|
+
// for builder
|
|
69
|
+
theta_union_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Allocator& allocator);
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
template<typename A>
|
|
73
|
+
class theta_union_experimental<A>::builder: public theta_base_builder<builder, A> {
|
|
74
|
+
public:
|
|
75
|
+
builder(const A& allocator = A());
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* This is to create an instance of the union with predefined parameters.
|
|
79
|
+
* @return an instance of the union
|
|
80
|
+
*/
|
|
81
|
+
theta_union_experimental<A> build() const;
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
} /* namespace datasketches */
|
|
85
|
+
|
|
86
|
+
#include "theta_union_experimental_impl.hpp"
|
|
87
|
+
|
|
88
|
+
#endif
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
namespace datasketches {
|
|
21
|
+
|
|
22
|
+
template<typename A>
|
|
23
|
+
theta_union_experimental<A>::theta_union_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator):
|
|
24
|
+
state_(lg_cur_size, lg_nom_size, rf, theta, seed, pass_through_policy(), allocator)
|
|
25
|
+
{}
|
|
26
|
+
|
|
27
|
+
template<typename A>
|
|
28
|
+
void theta_union_experimental<A>::update(const Sketch& sketch) {
|
|
29
|
+
state_.update(sketch);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
template<typename A>
|
|
33
|
+
auto theta_union_experimental<A>::get_result(bool ordered) const -> CompactSketch {
|
|
34
|
+
return state_.get_result(ordered);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
template<typename A>
|
|
38
|
+
theta_union_experimental<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
|
|
39
|
+
|
|
40
|
+
template<typename A>
|
|
41
|
+
auto theta_union_experimental<A>::builder::build() const -> theta_union_experimental {
|
|
42
|
+
return theta_union_experimental(
|
|
43
|
+
this->starting_sub_multiple(this->lg_k_ + 1, this->MIN_LG_K, static_cast<uint8_t>(this->rf_)),
|
|
44
|
+
this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef THETA_UPDATE_SKETCH_BASE_HPP_
|
|
21
|
+
#define THETA_UPDATE_SKETCH_BASE_HPP_
|
|
22
|
+
|
|
23
|
+
#include <vector>
|
|
24
|
+
#include <climits>
|
|
25
|
+
#include <cmath>
|
|
26
|
+
|
|
27
|
+
#include "common_defs.hpp"
|
|
28
|
+
#include "MurmurHash3.h"
|
|
29
|
+
#include "theta_comparators.hpp"
|
|
30
|
+
#include "theta_constants.hpp"
|
|
31
|
+
|
|
32
|
+
namespace datasketches {
|
|
33
|
+
|
|
34
|
+
template<
|
|
35
|
+
typename Entry,
|
|
36
|
+
typename ExtractKey,
|
|
37
|
+
typename Allocator = std::allocator<Entry>
|
|
38
|
+
>
|
|
39
|
+
struct theta_update_sketch_base {
|
|
40
|
+
using resize_factor = theta_constants::resize_factor;
|
|
41
|
+
using comparator = compare_by_key<ExtractKey>;
|
|
42
|
+
|
|
43
|
+
theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta,
|
|
44
|
+
uint64_t seed, const Allocator& allocator, bool is_empty = true);
|
|
45
|
+
theta_update_sketch_base(const theta_update_sketch_base& other);
|
|
46
|
+
theta_update_sketch_base(theta_update_sketch_base&& other) noexcept;
|
|
47
|
+
~theta_update_sketch_base();
|
|
48
|
+
theta_update_sketch_base& operator=(const theta_update_sketch_base& other);
|
|
49
|
+
theta_update_sketch_base& operator=(theta_update_sketch_base&& other);
|
|
50
|
+
|
|
51
|
+
using iterator = Entry*;
|
|
52
|
+
|
|
53
|
+
inline uint64_t hash_and_screen(const void* data, size_t length);
|
|
54
|
+
|
|
55
|
+
inline std::pair<iterator, bool> find(uint64_t key) const;
|
|
56
|
+
|
|
57
|
+
template<typename FwdEntry>
|
|
58
|
+
inline void insert(iterator it, FwdEntry&& entry);
|
|
59
|
+
|
|
60
|
+
iterator begin() const;
|
|
61
|
+
iterator end() const;
|
|
62
|
+
|
|
63
|
+
// resize threshold = 0.5 tuned for speed
|
|
64
|
+
static constexpr double RESIZE_THRESHOLD = 0.5;
|
|
65
|
+
// hash table rebuild threshold = 15/16
|
|
66
|
+
static constexpr double REBUILD_THRESHOLD = 15.0 / 16.0;
|
|
67
|
+
|
|
68
|
+
static constexpr uint8_t STRIDE_HASH_BITS = 7;
|
|
69
|
+
static constexpr uint32_t STRIDE_MASK = (1 << STRIDE_HASH_BITS) - 1;
|
|
70
|
+
|
|
71
|
+
Allocator allocator_;
|
|
72
|
+
bool is_empty_;
|
|
73
|
+
uint8_t lg_cur_size_;
|
|
74
|
+
uint8_t lg_nom_size_;
|
|
75
|
+
resize_factor rf_;
|
|
76
|
+
uint32_t num_entries_;
|
|
77
|
+
uint64_t theta_;
|
|
78
|
+
uint64_t seed_;
|
|
79
|
+
Entry* entries_;
|
|
80
|
+
|
|
81
|
+
void resize();
|
|
82
|
+
void rebuild();
|
|
83
|
+
void trim();
|
|
84
|
+
|
|
85
|
+
static inline uint32_t get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size);
|
|
86
|
+
static inline uint32_t get_stride(uint64_t key, uint8_t lg_size);
|
|
87
|
+
static void consolidate_non_empty(Entry* entries, size_t size, size_t num);
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
// builder
|
|
91
|
+
|
|
92
|
+
template<typename Derived, typename Allocator>
|
|
93
|
+
class theta_base_builder {
|
|
94
|
+
public:
|
|
95
|
+
using resize_factor = theta_constants::resize_factor;
|
|
96
|
+
static const uint8_t MIN_LG_K = theta_constants::MIN_LG_K;
|
|
97
|
+
static const uint8_t MAX_LG_K = theta_constants::MAX_LG_K;
|
|
98
|
+
static const uint8_t DEFAULT_LG_K = 12;
|
|
99
|
+
static const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Creates and instance of the builder with default parameters.
|
|
103
|
+
*/
|
|
104
|
+
theta_base_builder(const Allocator& allocator);
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Set log2(k), where k is a nominal number of entries in the sketch
|
|
108
|
+
* @param lg_k base 2 logarithm of nominal number of entries
|
|
109
|
+
* @return this builder
|
|
110
|
+
*/
|
|
111
|
+
Derived& set_lg_k(uint8_t lg_k);
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Set resize factor for the internal hash table (defaults to 8)
|
|
115
|
+
* @param rf resize factor
|
|
116
|
+
* @return this builder
|
|
117
|
+
*/
|
|
118
|
+
Derived& set_resize_factor(resize_factor rf);
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Set sampling probability (initial theta). The default is 1, so the sketch retains
|
|
122
|
+
* all entries until it reaches the limit, at which point it goes into the estimation mode
|
|
123
|
+
* and reduces the effective sampling probability (theta) as necessary.
|
|
124
|
+
* @param p sampling probability
|
|
125
|
+
* @return this builder
|
|
126
|
+
*/
|
|
127
|
+
Derived& set_p(float p);
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Set the seed for the hash function. Should be used carefully if needed.
|
|
131
|
+
* Sketches produced with different seed are not compatible
|
|
132
|
+
* and cannot be mixed in set operations.
|
|
133
|
+
* @param seed hash seed
|
|
134
|
+
* @return this builder
|
|
135
|
+
*/
|
|
136
|
+
Derived& set_seed(uint64_t seed);
|
|
137
|
+
|
|
138
|
+
protected:
|
|
139
|
+
Allocator allocator_;
|
|
140
|
+
uint8_t lg_k_;
|
|
141
|
+
resize_factor rf_;
|
|
142
|
+
float p_;
|
|
143
|
+
uint64_t seed_;
|
|
144
|
+
|
|
145
|
+
uint64_t starting_theta() const;
|
|
146
|
+
uint8_t starting_lg_size() const;
|
|
147
|
+
static uint8_t starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf);
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
// key extractors
|
|
151
|
+
|
|
152
|
+
struct trivial_extract_key {
|
|
153
|
+
template<typename T>
|
|
154
|
+
auto operator()(T&& entry) const -> decltype(std::forward<T>(entry)) {
|
|
155
|
+
return std::forward<T>(entry);
|
|
156
|
+
}
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
template<typename K, typename V>
|
|
160
|
+
struct pair_extract_key {
|
|
161
|
+
K& operator()(std::pair<K, V>& entry) const {
|
|
162
|
+
return entry.first;
|
|
163
|
+
}
|
|
164
|
+
const K& operator()(const std::pair<K, V>& entry) const {
|
|
165
|
+
return entry.first;
|
|
166
|
+
}
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
// not zero
|
|
170
|
+
|
|
171
|
+
template<typename Entry, typename ExtractKey>
|
|
172
|
+
class key_not_zero {
|
|
173
|
+
public:
|
|
174
|
+
bool operator()(const Entry& entry) const {
|
|
175
|
+
return ExtractKey()(entry) != 0;
|
|
176
|
+
}
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
template<typename Key, typename Entry, typename ExtractKey>
|
|
180
|
+
class key_not_zero_less_than {
|
|
181
|
+
public:
|
|
182
|
+
explicit key_not_zero_less_than(const Key& key): key(key) {}
|
|
183
|
+
bool operator()(const Entry& entry) const {
|
|
184
|
+
return ExtractKey()(entry) != 0 && ExtractKey()(entry) < this->key;
|
|
185
|
+
}
|
|
186
|
+
private:
|
|
187
|
+
Key key;
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
// MurMur3 hash functions
|
|
191
|
+
|
|
192
|
+
static inline uint64_t compute_hash(const void* data, size_t length, uint64_t seed) {
|
|
193
|
+
HashState hashes;
|
|
194
|
+
MurmurHash3_x64_128(data, length, seed, hashes);
|
|
195
|
+
return (hashes.h1 >> 1); // Java implementation does unsigned shift >>> to make values positive
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
static inline uint16_t compute_seed_hash(uint64_t seed) {
|
|
199
|
+
HashState hashes;
|
|
200
|
+
MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
|
|
201
|
+
return hashes.h1;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// iterators
|
|
205
|
+
|
|
206
|
+
template<typename Entry, typename ExtractKey>
|
|
207
|
+
class theta_iterator: public std::iterator<std::input_iterator_tag, Entry> {
|
|
208
|
+
public:
|
|
209
|
+
theta_iterator(Entry* entries, uint32_t size, uint32_t index);
|
|
210
|
+
theta_iterator& operator++();
|
|
211
|
+
theta_iterator operator++(int);
|
|
212
|
+
bool operator==(const theta_iterator& other) const;
|
|
213
|
+
bool operator!=(const theta_iterator& other) const;
|
|
214
|
+
Entry& operator*() const;
|
|
215
|
+
|
|
216
|
+
private:
|
|
217
|
+
Entry* entries_;
|
|
218
|
+
uint32_t size_;
|
|
219
|
+
uint32_t index_;
|
|
220
|
+
};
|
|
221
|
+
|
|
222
|
+
template<typename Entry, typename ExtractKey>
|
|
223
|
+
class theta_const_iterator: public std::iterator<std::input_iterator_tag, Entry> {
|
|
224
|
+
public:
|
|
225
|
+
theta_const_iterator(const Entry* entries, uint32_t size, uint32_t index);
|
|
226
|
+
theta_const_iterator& operator++();
|
|
227
|
+
theta_const_iterator operator++(int);
|
|
228
|
+
bool operator==(const theta_const_iterator& other) const;
|
|
229
|
+
bool operator!=(const theta_const_iterator& other) const;
|
|
230
|
+
const Entry& operator*() const;
|
|
231
|
+
|
|
232
|
+
private:
|
|
233
|
+
const Entry* entries_;
|
|
234
|
+
uint32_t size_;
|
|
235
|
+
uint32_t index_;
|
|
236
|
+
};
|
|
237
|
+
|
|
238
|
+
// double value canonicalization for compatibility with Java
|
|
239
|
+
static inline int64_t canonical_double(double value) {
|
|
240
|
+
union {
|
|
241
|
+
int64_t long_value;
|
|
242
|
+
double double_value;
|
|
243
|
+
} long_double_union;
|
|
244
|
+
|
|
245
|
+
if (value == 0.0) {
|
|
246
|
+
long_double_union.double_value = 0.0; // canonicalize -0.0 to 0.0
|
|
247
|
+
} else if (std::isnan(value)) {
|
|
248
|
+
long_double_union.long_value = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
|
|
249
|
+
} else {
|
|
250
|
+
long_double_union.double_value = value;
|
|
251
|
+
}
|
|
252
|
+
return long_double_union.long_value;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
} /* namespace datasketches */
|
|
256
|
+
|
|
257
|
+
#include "theta_update_sketch_base_impl.hpp"
|
|
258
|
+
|
|
259
|
+
#endif
|