datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <iostream>
|
|
21
|
+
#include <sstream>
|
|
22
|
+
#include <algorithm>
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
template<typename EN, typename EK, typename A>
|
|
27
|
+
theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator, bool is_empty):
|
|
28
|
+
allocator_(allocator),
|
|
29
|
+
is_empty_(is_empty),
|
|
30
|
+
lg_cur_size_(lg_cur_size),
|
|
31
|
+
lg_nom_size_(lg_nom_size),
|
|
32
|
+
rf_(rf),
|
|
33
|
+
num_entries_(0),
|
|
34
|
+
theta_(theta),
|
|
35
|
+
seed_(seed),
|
|
36
|
+
entries_(nullptr)
|
|
37
|
+
{
|
|
38
|
+
if (lg_cur_size > 0) {
|
|
39
|
+
const size_t size = 1 << lg_cur_size;
|
|
40
|
+
entries_ = allocator_.allocate(size);
|
|
41
|
+
for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
template<typename EN, typename EK, typename A>
|
|
46
|
+
theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(const theta_update_sketch_base& other):
|
|
47
|
+
allocator_(other.allocator_),
|
|
48
|
+
is_empty_(other.is_empty_),
|
|
49
|
+
lg_cur_size_(other.lg_cur_size_),
|
|
50
|
+
lg_nom_size_(other.lg_nom_size_),
|
|
51
|
+
rf_(other.rf_),
|
|
52
|
+
num_entries_(other.num_entries_),
|
|
53
|
+
theta_(other.theta_),
|
|
54
|
+
seed_(other.seed_),
|
|
55
|
+
entries_(nullptr)
|
|
56
|
+
{
|
|
57
|
+
if (other.entries_ != nullptr) {
|
|
58
|
+
const size_t size = 1 << lg_cur_size_;
|
|
59
|
+
entries_ = allocator_.allocate(size);
|
|
60
|
+
for (size_t i = 0; i < size; ++i) {
|
|
61
|
+
if (EK()(other.entries_[i]) != 0) {
|
|
62
|
+
new (&entries_[i]) EN(other.entries_[i]);
|
|
63
|
+
} else {
|
|
64
|
+
EK()(entries_[i]) = 0;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
template<typename EN, typename EK, typename A>
|
|
71
|
+
theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(theta_update_sketch_base&& other) noexcept:
|
|
72
|
+
allocator_(other.allocator_),
|
|
73
|
+
is_empty_(other.is_empty_),
|
|
74
|
+
lg_cur_size_(other.lg_cur_size_),
|
|
75
|
+
lg_nom_size_(other.lg_nom_size_),
|
|
76
|
+
rf_(other.rf_),
|
|
77
|
+
num_entries_(other.num_entries_),
|
|
78
|
+
theta_(other.theta_),
|
|
79
|
+
seed_(other.seed_),
|
|
80
|
+
entries_(other.entries_)
|
|
81
|
+
{
|
|
82
|
+
other.entries_ = nullptr;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
template<typename EN, typename EK, typename A>
|
|
86
|
+
theta_update_sketch_base<EN, EK, A>::~theta_update_sketch_base()
|
|
87
|
+
{
|
|
88
|
+
if (entries_ != nullptr) {
|
|
89
|
+
const size_t size = 1 << lg_cur_size_;
|
|
90
|
+
for (size_t i = 0; i < size; ++i) {
|
|
91
|
+
if (EK()(entries_[i]) != 0) entries_[i].~EN();
|
|
92
|
+
}
|
|
93
|
+
allocator_.deallocate(entries_, size);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
template<typename EN, typename EK, typename A>
|
|
98
|
+
theta_update_sketch_base<EN, EK, A>& theta_update_sketch_base<EN, EK, A>::operator=(const theta_update_sketch_base& other) {
|
|
99
|
+
theta_update_sketch_base<EN, EK, A> copy(other);
|
|
100
|
+
std::swap(allocator_, copy.allocator_);
|
|
101
|
+
std::swap(is_empty_, copy.is_empty_);
|
|
102
|
+
std::swap(lg_cur_size_, copy.lg_cur_size_);
|
|
103
|
+
std::swap(lg_nom_size_, copy.lg_nom_size_);
|
|
104
|
+
std::swap(rf_, copy.rf_);
|
|
105
|
+
std::swap(num_entries_, copy.num_entries_);
|
|
106
|
+
std::swap(theta_, copy.theta_);
|
|
107
|
+
std::swap(seed_, copy.seed_);
|
|
108
|
+
std::swap(entries_, copy.entries_);
|
|
109
|
+
return *this;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
template<typename EN, typename EK, typename A>
|
|
113
|
+
theta_update_sketch_base<EN, EK, A>& theta_update_sketch_base<EN, EK, A>::operator=(theta_update_sketch_base&& other) {
|
|
114
|
+
std::swap(allocator_, other.allocator_);
|
|
115
|
+
std::swap(is_empty_, other.is_empty_);
|
|
116
|
+
std::swap(lg_cur_size_, other.lg_cur_size_);
|
|
117
|
+
std::swap(lg_nom_size_, other.lg_nom_size_);
|
|
118
|
+
std::swap(rf_, other.rf_);
|
|
119
|
+
std::swap(num_entries_, other.num_entries_);
|
|
120
|
+
std::swap(theta_, other.theta_);
|
|
121
|
+
std::swap(seed_, other.seed_);
|
|
122
|
+
std::swap(entries_, other.entries_);
|
|
123
|
+
return *this;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
template<typename EN, typename EK, typename A>
|
|
127
|
+
uint64_t theta_update_sketch_base<EN, EK, A>::hash_and_screen(const void* data, size_t length) {
|
|
128
|
+
is_empty_ = false;
|
|
129
|
+
const uint64_t hash = compute_hash(data, length, seed_);
|
|
130
|
+
if (hash >= theta_) return 0; // hash == 0 is reserved to mark empty slots in the table
|
|
131
|
+
return hash;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
template<typename EN, typename EK, typename A>
|
|
135
|
+
auto theta_update_sketch_base<EN, EK, A>::find(uint64_t key) const -> std::pair<iterator, bool> {
|
|
136
|
+
const size_t size = 1 << lg_cur_size_;
|
|
137
|
+
const size_t mask = size - 1;
|
|
138
|
+
const uint32_t stride = get_stride(key, lg_cur_size_);
|
|
139
|
+
uint32_t index = static_cast<uint32_t>(key) & mask;
|
|
140
|
+
// search for duplicate or zero
|
|
141
|
+
const uint32_t loop_index = index;
|
|
142
|
+
do {
|
|
143
|
+
const uint64_t probe = EK()(entries_[index]);
|
|
144
|
+
if (probe == 0) {
|
|
145
|
+
return std::pair<iterator, bool>(&entries_[index], false);
|
|
146
|
+
} else if (probe == key) {
|
|
147
|
+
return std::pair<iterator, bool>(&entries_[index], true);
|
|
148
|
+
}
|
|
149
|
+
index = (index + stride) & mask;
|
|
150
|
+
} while (index != loop_index);
|
|
151
|
+
throw std::logic_error("key not found and no empty slots!");
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
template<typename EN, typename EK, typename A>
|
|
155
|
+
template<typename Fwd>
|
|
156
|
+
void theta_update_sketch_base<EN, EK, A>::insert(iterator it, Fwd&& entry) {
|
|
157
|
+
new (it) EN(std::forward<Fwd>(entry));
|
|
158
|
+
++num_entries_;
|
|
159
|
+
if (num_entries_ > get_capacity(lg_cur_size_, lg_nom_size_)) {
|
|
160
|
+
if (lg_cur_size_ <= lg_nom_size_) {
|
|
161
|
+
resize();
|
|
162
|
+
} else {
|
|
163
|
+
rebuild();
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
template<typename EN, typename EK, typename A>
|
|
169
|
+
auto theta_update_sketch_base<EN, EK, A>::begin() const -> iterator {
|
|
170
|
+
return entries_;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
template<typename EN, typename EK, typename A>
|
|
174
|
+
auto theta_update_sketch_base<EN, EK, A>::end() const -> iterator {
|
|
175
|
+
return &entries_[1 << lg_cur_size_];
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
template<typename EN, typename EK, typename A>
|
|
179
|
+
uint32_t theta_update_sketch_base<EN, EK, A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
|
|
180
|
+
const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
|
|
181
|
+
return std::floor(fraction * (1 << lg_cur_size));
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
template<typename EN, typename EK, typename A>
|
|
185
|
+
uint32_t theta_update_sketch_base<EN, EK, A>::get_stride(uint64_t key, uint8_t lg_size) {
|
|
186
|
+
// odd and independent of index assuming lg_size lowest bits of the key were used for the index
|
|
187
|
+
return (2 * static_cast<uint32_t>((key >> lg_size) & STRIDE_MASK)) + 1;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
template<typename EN, typename EK, typename A>
|
|
191
|
+
void theta_update_sketch_base<EN, EK, A>::resize() {
|
|
192
|
+
const size_t old_size = 1 << lg_cur_size_;
|
|
193
|
+
const uint8_t lg_tgt_size = lg_nom_size_ + 1;
|
|
194
|
+
const uint8_t factor = std::max(1, std::min(static_cast<int>(rf_), lg_tgt_size - lg_cur_size_));
|
|
195
|
+
lg_cur_size_ += factor;
|
|
196
|
+
const size_t new_size = 1 << lg_cur_size_;
|
|
197
|
+
EN* old_entries = entries_;
|
|
198
|
+
entries_ = allocator_.allocate(new_size);
|
|
199
|
+
for (size_t i = 0; i < new_size; ++i) EK()(entries_[i]) = 0;
|
|
200
|
+
num_entries_ = 0;
|
|
201
|
+
for (size_t i = 0; i < old_size; ++i) {
|
|
202
|
+
const uint64_t key = EK()(old_entries[i]);
|
|
203
|
+
if (key != 0) {
|
|
204
|
+
insert(find(key).first, std::move(old_entries[i])); // consider a special insert with no comparison
|
|
205
|
+
old_entries[i].~EN();
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
allocator_.deallocate(old_entries, old_size);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// assumes number of entries > nominal size
|
|
212
|
+
template<typename EN, typename EK, typename A>
|
|
213
|
+
void theta_update_sketch_base<EN, EK, A>::rebuild() {
|
|
214
|
+
const size_t size = 1 << lg_cur_size_;
|
|
215
|
+
const uint32_t nominal_size = 1 << lg_nom_size_;
|
|
216
|
+
|
|
217
|
+
// empty entries have uninitialized payloads
|
|
218
|
+
// TODO: avoid this for empty or trivial payloads (arithmetic types)
|
|
219
|
+
consolidate_non_empty(entries_, size, num_entries_);
|
|
220
|
+
|
|
221
|
+
std::nth_element(entries_, entries_ + nominal_size, entries_ + num_entries_, comparator());
|
|
222
|
+
this->theta_ = EK()(entries_[nominal_size]);
|
|
223
|
+
EN* old_entries = entries_;
|
|
224
|
+
const size_t num_old_entries = num_entries_;
|
|
225
|
+
entries_ = allocator_.allocate(size);
|
|
226
|
+
for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
|
|
227
|
+
num_entries_ = 0;
|
|
228
|
+
// relies on consolidating non-empty entries to the front
|
|
229
|
+
for (size_t i = 0; i < nominal_size; ++i) {
|
|
230
|
+
insert(find(EK()(old_entries[i])).first, std::move(old_entries[i])); // consider a special insert with no comparison
|
|
231
|
+
old_entries[i].~EN();
|
|
232
|
+
}
|
|
233
|
+
for (size_t i = nominal_size; i < num_old_entries; ++i) old_entries[i].~EN();
|
|
234
|
+
allocator_.deallocate(old_entries, size);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
template<typename EN, typename EK, typename A>
|
|
238
|
+
void theta_update_sketch_base<EN, EK, A>::trim() {
|
|
239
|
+
if (num_entries_ > static_cast<uint32_t>(1 << lg_nom_size_)) rebuild();
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
template<typename EN, typename EK, typename A>
|
|
243
|
+
void theta_update_sketch_base<EN, EK, A>::consolidate_non_empty(EN* entries, size_t size, size_t num) {
|
|
244
|
+
// find the first empty slot
|
|
245
|
+
size_t i = 0;
|
|
246
|
+
while (i < size) {
|
|
247
|
+
if (EK()(entries[i]) == 0) break;
|
|
248
|
+
++i;
|
|
249
|
+
}
|
|
250
|
+
// scan the rest and move non-empty entries to the front
|
|
251
|
+
for (size_t j = i + 1; j < size; ++j) {
|
|
252
|
+
if (EK()(entries[j]) != 0) {
|
|
253
|
+
new (&entries[i]) EN(std::move(entries[j]));
|
|
254
|
+
entries[j].~EN();
|
|
255
|
+
EK()(entries[j]) = 0;
|
|
256
|
+
++i;
|
|
257
|
+
if (i == num) break;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// builder
|
|
263
|
+
|
|
264
|
+
template<typename Derived, typename Allocator>
|
|
265
|
+
theta_base_builder<Derived, Allocator>::theta_base_builder(const Allocator& allocator):
|
|
266
|
+
allocator_(allocator), lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
|
|
267
|
+
|
|
268
|
+
template<typename Derived, typename Allocator>
|
|
269
|
+
Derived& theta_base_builder<Derived, Allocator>::set_lg_k(uint8_t lg_k) {
|
|
270
|
+
if (lg_k < MIN_LG_K) {
|
|
271
|
+
throw std::invalid_argument("lg_k must not be less than " + std::to_string(MIN_LG_K) + ": " + std::to_string(lg_k));
|
|
272
|
+
}
|
|
273
|
+
if (lg_k > MAX_LG_K) {
|
|
274
|
+
throw std::invalid_argument("lg_k must not be greater than " + std::to_string(MAX_LG_K) + ": " + std::to_string(lg_k));
|
|
275
|
+
}
|
|
276
|
+
lg_k_ = lg_k;
|
|
277
|
+
return static_cast<Derived&>(*this);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
template<typename Derived, typename Allocator>
|
|
281
|
+
Derived& theta_base_builder<Derived, Allocator>::set_resize_factor(resize_factor rf) {
|
|
282
|
+
rf_ = rf;
|
|
283
|
+
return static_cast<Derived&>(*this);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
template<typename Derived, typename Allocator>
|
|
287
|
+
Derived& theta_base_builder<Derived, Allocator>::set_p(float p) {
|
|
288
|
+
if (p <= 0 || p > 1) throw std::invalid_argument("sampling probability must be between 0 and 1");
|
|
289
|
+
p_ = p;
|
|
290
|
+
return static_cast<Derived&>(*this);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
template<typename Derived, typename Allocator>
|
|
294
|
+
Derived& theta_base_builder<Derived, Allocator>::set_seed(uint64_t seed) {
|
|
295
|
+
seed_ = seed;
|
|
296
|
+
return static_cast<Derived&>(*this);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
template<typename Derived, typename Allocator>
|
|
300
|
+
uint64_t theta_base_builder<Derived, Allocator>::starting_theta() const {
|
|
301
|
+
if (p_ < 1) return theta_constants::MAX_THETA * p_;
|
|
302
|
+
return theta_constants::MAX_THETA;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
template<typename Derived, typename Allocator>
|
|
306
|
+
uint8_t theta_base_builder<Derived, Allocator>::starting_lg_size() const {
|
|
307
|
+
return starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_));
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
template<typename Derived, typename Allocator>
|
|
311
|
+
uint8_t theta_base_builder<Derived, Allocator>::starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf) {
|
|
312
|
+
return (lg_tgt <= lg_min) ? lg_min : (lg_rf == 0) ? lg_tgt : ((lg_tgt - lg_min) % lg_rf) + lg_min;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// iterator
|
|
316
|
+
|
|
317
|
+
template<typename Entry, typename ExtractKey>
|
|
318
|
+
theta_iterator<Entry, ExtractKey>::theta_iterator(Entry* entries, uint32_t size, uint32_t index):
|
|
319
|
+
entries_(entries), size_(size), index_(index) {
|
|
320
|
+
while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
template<typename Entry, typename ExtractKey>
|
|
324
|
+
auto theta_iterator<Entry, ExtractKey>::operator++() -> theta_iterator& {
|
|
325
|
+
++index_;
|
|
326
|
+
while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
|
|
327
|
+
return *this;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
template<typename Entry, typename ExtractKey>
|
|
331
|
+
auto theta_iterator<Entry, ExtractKey>::operator++(int) -> theta_iterator {
|
|
332
|
+
theta_iterator tmp(*this);
|
|
333
|
+
operator++();
|
|
334
|
+
return tmp;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
template<typename Entry, typename ExtractKey>
|
|
338
|
+
bool theta_iterator<Entry, ExtractKey>::operator!=(const theta_iterator& other) const {
|
|
339
|
+
return index_ != other.index_;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
template<typename Entry, typename ExtractKey>
|
|
343
|
+
bool theta_iterator<Entry, ExtractKey>::operator==(const theta_iterator& other) const {
|
|
344
|
+
return index_ == other.index_;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
template<typename Entry, typename ExtractKey>
|
|
348
|
+
auto theta_iterator<Entry, ExtractKey>::operator*() const -> Entry& {
|
|
349
|
+
return entries_[index_];
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// const iterator
|
|
353
|
+
|
|
354
|
+
template<typename Entry, typename ExtractKey>
|
|
355
|
+
theta_const_iterator<Entry, ExtractKey>::theta_const_iterator(const Entry* entries, uint32_t size, uint32_t index):
|
|
356
|
+
entries_(entries), size_(size), index_(index) {
|
|
357
|
+
while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
template<typename Entry, typename ExtractKey>
|
|
361
|
+
auto theta_const_iterator<Entry, ExtractKey>::operator++() -> theta_const_iterator& {
|
|
362
|
+
++index_;
|
|
363
|
+
while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
|
|
364
|
+
return *this;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
template<typename Entry, typename ExtractKey>
|
|
368
|
+
auto theta_const_iterator<Entry, ExtractKey>::operator++(int) -> theta_const_iterator {
|
|
369
|
+
theta_const_iterator tmp(*this);
|
|
370
|
+
operator++();
|
|
371
|
+
return tmp;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
template<typename Entry, typename ExtractKey>
|
|
375
|
+
bool theta_const_iterator<Entry, ExtractKey>::operator!=(const theta_const_iterator& other) const {
|
|
376
|
+
return index_ != other.index_;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
template<typename Entry, typename ExtractKey>
|
|
380
|
+
bool theta_const_iterator<Entry, ExtractKey>::operator==(const theta_const_iterator& other) const {
|
|
381
|
+
return index_ == other.index_;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
template<typename Entry, typename ExtractKey>
|
|
385
|
+
auto theta_const_iterator<Entry, ExtractKey>::operator*() const -> const Entry& {
|
|
386
|
+
return entries_[index_];
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef TUPLE_A_NOT_B_HPP_
|
|
21
|
+
#define TUPLE_A_NOT_B_HPP_
|
|
22
|
+
|
|
23
|
+
#include "tuple_sketch.hpp"
|
|
24
|
+
#include "theta_set_difference_base.hpp"
|
|
25
|
+
|
|
26
|
+
namespace datasketches {
|
|
27
|
+
|
|
28
|
+
template<
|
|
29
|
+
typename Summary,
|
|
30
|
+
typename Allocator = std::allocator<Summary>
|
|
31
|
+
>
|
|
32
|
+
class tuple_a_not_b {
|
|
33
|
+
public:
|
|
34
|
+
using Entry = std::pair<uint64_t, Summary>;
|
|
35
|
+
using ExtractKey = pair_extract_key<uint64_t, Summary>;
|
|
36
|
+
using CompactSketch = compact_tuple_sketch<Summary, Allocator>;
|
|
37
|
+
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
|
|
38
|
+
using State = theta_set_difference_base<Entry, ExtractKey, CompactSketch, AllocEntry>;
|
|
39
|
+
|
|
40
|
+
explicit tuple_a_not_b(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Computes the a-not-b set operation given two sketches.
|
|
44
|
+
* @return the result of a-not-b
|
|
45
|
+
*/
|
|
46
|
+
template<typename FwdSketch, typename Sketch>
|
|
47
|
+
CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const;
|
|
48
|
+
|
|
49
|
+
private:
|
|
50
|
+
State state_;
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
} /* namespace datasketches */
|
|
54
|
+
|
|
55
|
+
#include "tuple_a_not_b_impl.hpp"
|
|
56
|
+
|
|
57
|
+
#endif
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
namespace datasketches {
|
|
21
|
+
|
|
22
|
+
template<typename S, typename A>
|
|
23
|
+
tuple_a_not_b<S, A>::tuple_a_not_b(uint64_t seed, const A& allocator):
|
|
24
|
+
state_(seed, allocator)
|
|
25
|
+
{}
|
|
26
|
+
|
|
27
|
+
template<typename S, typename A>
|
|
28
|
+
template<typename FwdSketch, typename Sketch>
|
|
29
|
+
auto tuple_a_not_b<S, A>::compute(FwdSketch&& a, const Sketch& b, bool ordered) const -> CompactSketch {
|
|
30
|
+
return state_.compute(std::forward<FwdSketch>(a), b, ordered);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
} /* namespace datasketches */
|