datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef THETA_UNION_HPP_
|
|
21
|
+
#define THETA_UNION_HPP_
|
|
22
|
+
|
|
23
|
+
#include <memory>
|
|
24
|
+
#include <functional>
|
|
25
|
+
#include <climits>
|
|
26
|
+
|
|
27
|
+
#include "theta_sketch.hpp"
|
|
28
|
+
|
|
29
|
+
namespace datasketches {
|
|
30
|
+
|
|
31
|
+
/*
|
|
32
|
+
* author Alexander Saydakov
|
|
33
|
+
* author Lee Rhodes
|
|
34
|
+
* author Kevin Lang
|
|
35
|
+
*/
|
|
36
|
+
|
|
37
|
+
template<typename A>
|
|
38
|
+
class theta_union_alloc {
|
|
39
|
+
public:
|
|
40
|
+
class builder;
|
|
41
|
+
|
|
42
|
+
// No constructor here. Use builder instead.
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* This method is to update the union with a given sketch
|
|
46
|
+
* @param sketch to update the union with
|
|
47
|
+
*/
|
|
48
|
+
void update(const theta_sketch_alloc<A>& sketch);
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* This method produces a copy of the current state of the union as a compact sketch.
|
|
52
|
+
* @param ordered optional flag to specify if ordered sketch should be produced
|
|
53
|
+
* @return the result of the union
|
|
54
|
+
*/
|
|
55
|
+
compact_theta_sketch_alloc<A> get_result(bool ordered = true) const;
|
|
56
|
+
|
|
57
|
+
private:
|
|
58
|
+
bool is_empty_;
|
|
59
|
+
uint64_t theta_;
|
|
60
|
+
update_theta_sketch_alloc<A> state_;
|
|
61
|
+
|
|
62
|
+
// for builder
|
|
63
|
+
theta_union_alloc(uint64_t theta, update_theta_sketch_alloc<A>&& state);
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
// builder
|
|
67
|
+
|
|
68
|
+
template<typename A>
|
|
69
|
+
class theta_union_alloc<A>::builder {
|
|
70
|
+
public:
|
|
71
|
+
typedef typename update_theta_sketch_alloc<A>::resize_factor resize_factor;
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Set log2(k), where k is a nominal number of entries in the sketch
|
|
75
|
+
* @param lg_k base 2 logarithm of nominal number of entries
|
|
76
|
+
* @return this builder
|
|
77
|
+
*/
|
|
78
|
+
builder& set_lg_k(uint8_t lg_k);
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Set resize factor for the internal hash table (defaults to 8)
|
|
82
|
+
* @param rf resize factor
|
|
83
|
+
* @return this builder
|
|
84
|
+
*/
|
|
85
|
+
builder& set_resize_factor(resize_factor rf);
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Set sampling probability (initial theta). The default is 1, so the sketch retains
|
|
89
|
+
* all entries until it reaches the limit, at which point it goes into the estimation mode
|
|
90
|
+
* and reduces the effective sampling probability (theta) as necessary.
|
|
91
|
+
* @param p sampling probability
|
|
92
|
+
* @return this builder
|
|
93
|
+
*/
|
|
94
|
+
builder& set_p(float p);
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Set the seed for the hash function. Should be used carefully if needed.
|
|
98
|
+
* Sketches produced with different seed are not compatible
|
|
99
|
+
* and cannot be mixed in set operations.
|
|
100
|
+
* @param seed hash seed
|
|
101
|
+
* @return this builder
|
|
102
|
+
*/
|
|
103
|
+
builder& set_seed(uint64_t seed);
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* This is to create an instance of the union with predefined parameters.
|
|
107
|
+
* @return and instance of the union
|
|
108
|
+
*/
|
|
109
|
+
theta_union_alloc<A> build() const;
|
|
110
|
+
|
|
111
|
+
private:
|
|
112
|
+
typename update_theta_sketch_alloc<A>::builder sketch_builder;
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
// alias with default allocator for convenience
|
|
116
|
+
typedef theta_union_alloc<std::allocator<void>> theta_union;
|
|
117
|
+
|
|
118
|
+
} /* namespace datasketches */
|
|
119
|
+
|
|
120
|
+
#include "theta_union_impl.hpp"
|
|
121
|
+
|
|
122
|
+
# endif
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef THETA_UNION_IMPL_HPP_
|
|
21
|
+
#define THETA_UNION_IMPL_HPP_
|
|
22
|
+
|
|
23
|
+
namespace datasketches {
|
|
24
|
+
|
|
25
|
+
/*
|
|
26
|
+
* author Alexander Saydakov
|
|
27
|
+
* author Lee Rhodes
|
|
28
|
+
* author Kevin Lang
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
template<typename A>
|
|
32
|
+
theta_union_alloc<A>::theta_union_alloc(uint64_t theta, update_theta_sketch_alloc<A>&& state):
|
|
33
|
+
is_empty_(true), theta_(theta), state_(std::move(state)) {}
|
|
34
|
+
|
|
35
|
+
template<typename A>
|
|
36
|
+
void theta_union_alloc<A>::update(const theta_sketch_alloc<A>& sketch) {
|
|
37
|
+
if (sketch.is_empty()) return;
|
|
38
|
+
if (sketch.get_seed_hash() != state_.get_seed_hash()) throw std::invalid_argument("seed hash mismatch");
|
|
39
|
+
is_empty_ = false;
|
|
40
|
+
if (sketch.get_theta64() < theta_) theta_ = sketch.get_theta64();
|
|
41
|
+
if (sketch.is_ordered()) {
|
|
42
|
+
for (auto hash: sketch) {
|
|
43
|
+
if (hash >= theta_) break; // early stop
|
|
44
|
+
state_.internal_update(hash);
|
|
45
|
+
}
|
|
46
|
+
} else {
|
|
47
|
+
for (auto hash: sketch) if (hash < theta_) state_.internal_update(hash);
|
|
48
|
+
}
|
|
49
|
+
if (state_.get_theta64() < theta_) theta_ = state_.get_theta64();
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
template<typename A>
|
|
53
|
+
compact_theta_sketch_alloc<A> theta_union_alloc<A>::get_result(bool ordered) const {
|
|
54
|
+
if (is_empty_) return state_.compact(ordered);
|
|
55
|
+
const uint32_t nom_num_keys = 1 << state_.lg_nom_size_;
|
|
56
|
+
if (theta_ >= state_.theta_ && state_.get_num_retained() <= nom_num_keys) return state_.compact(ordered);
|
|
57
|
+
uint64_t theta = std::min(theta_, state_.get_theta64());
|
|
58
|
+
vector_u64<A> keys(state_.get_num_retained());
|
|
59
|
+
uint32_t num_keys = 0;
|
|
60
|
+
for (auto key: state_) {
|
|
61
|
+
if (key < theta) keys[num_keys++] = key;
|
|
62
|
+
}
|
|
63
|
+
if (num_keys > nom_num_keys) {
|
|
64
|
+
std::nth_element(keys.begin(), keys.begin() + nom_num_keys, keys.begin() + num_keys);
|
|
65
|
+
theta = keys[nom_num_keys];
|
|
66
|
+
num_keys = nom_num_keys;
|
|
67
|
+
}
|
|
68
|
+
if (num_keys != state_.get_num_retained()) {
|
|
69
|
+
keys.resize(num_keys);
|
|
70
|
+
}
|
|
71
|
+
if (ordered) std::sort(keys.begin(), keys.end());
|
|
72
|
+
return compact_theta_sketch_alloc<A>(false, theta, std::move(keys), state_.get_seed_hash(), ordered);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// builder
|
|
76
|
+
|
|
77
|
+
template<typename A>
|
|
78
|
+
typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_lg_k(uint8_t lg_k) {
|
|
79
|
+
sketch_builder.set_lg_k(lg_k);
|
|
80
|
+
return *this;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
template<typename A>
|
|
84
|
+
typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_resize_factor(resize_factor rf) {
|
|
85
|
+
sketch_builder.set_resize_factor(rf);
|
|
86
|
+
return *this;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
template<typename A>
|
|
90
|
+
typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_p(float p) {
|
|
91
|
+
sketch_builder.set_p(p);
|
|
92
|
+
return *this;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
template<typename A>
|
|
96
|
+
typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_seed(uint64_t seed) {
|
|
97
|
+
sketch_builder.set_seed(seed);
|
|
98
|
+
return *this;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
template<typename A>
|
|
102
|
+
theta_union_alloc<A> theta_union_alloc<A>::builder::build() const {
|
|
103
|
+
update_theta_sketch_alloc<A> sketch = sketch_builder.build();
|
|
104
|
+
return theta_union_alloc(sketch.get_theta64(), std::move(sketch));
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
} /* namespace datasketches */
|
|
108
|
+
|
|
109
|
+
# endif
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
add_executable(theta_test)
|
|
19
|
+
|
|
20
|
+
target_link_libraries(theta_test theta common_test)
|
|
21
|
+
|
|
22
|
+
set_target_properties(theta_test PROPERTIES
|
|
23
|
+
CXX_STANDARD 11
|
|
24
|
+
CXX_STANDARD_REQUIRED YES
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" THETA_TEST_BINARY_PATH)
|
|
28
|
+
string(APPEND THETA_TEST_BINARY_PATH "/")
|
|
29
|
+
target_compile_definitions(theta_test
|
|
30
|
+
PRIVATE
|
|
31
|
+
TEST_BINARY_INPUT_PATH="${THETA_TEST_BINARY_PATH}"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
add_test(
|
|
35
|
+
NAME theta_test
|
|
36
|
+
COMMAND theta_test
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
target_sources(theta_test
|
|
40
|
+
PRIVATE
|
|
41
|
+
theta_sketch_test.cpp
|
|
42
|
+
theta_union_test.cpp
|
|
43
|
+
theta_intersection_test.cpp
|
|
44
|
+
theta_a_not_b_test.cpp
|
|
45
|
+
)
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch.hpp>
|
|
21
|
+
|
|
22
|
+
#include <theta_a_not_b.hpp>
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
TEST_CASE("theta a-not-b: empty", "[theta_a_not_b]") {
|
|
27
|
+
theta_a_not_b a_not_b;
|
|
28
|
+
update_theta_sketch a = update_theta_sketch::builder().build();
|
|
29
|
+
update_theta_sketch b = update_theta_sketch::builder().build();
|
|
30
|
+
compact_theta_sketch result = a_not_b.compute(a, b);
|
|
31
|
+
REQUIRE(result.get_num_retained() == 0);
|
|
32
|
+
REQUIRE(result.is_empty());
|
|
33
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
34
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
TEST_CASE("theta a-not-b: non empty no retained keys", "[theta_a_not_b]") {
|
|
38
|
+
update_theta_sketch a = update_theta_sketch::builder().build();
|
|
39
|
+
a.update(1);
|
|
40
|
+
update_theta_sketch b = update_theta_sketch::builder().set_p(0.001).build();
|
|
41
|
+
theta_a_not_b a_not_b;
|
|
42
|
+
|
|
43
|
+
// B is still empty
|
|
44
|
+
compact_theta_sketch result = a_not_b.compute(a, b);
|
|
45
|
+
REQUIRE_FALSE(result.is_empty());
|
|
46
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
47
|
+
REQUIRE(result.get_num_retained() == 1);
|
|
48
|
+
REQUIRE(result.get_theta() == Approx(1).margin(1e-10));
|
|
49
|
+
REQUIRE(result.get_estimate() == 1.0);
|
|
50
|
+
|
|
51
|
+
// B is not empty in estimation mode and no entries
|
|
52
|
+
b.update(1);
|
|
53
|
+
REQUIRE(b.get_num_retained() == 0U);
|
|
54
|
+
|
|
55
|
+
result = a_not_b.compute(a, b);
|
|
56
|
+
REQUIRE_FALSE(result.is_empty());
|
|
57
|
+
REQUIRE(result.is_estimation_mode());
|
|
58
|
+
REQUIRE(result.get_num_retained() == 0);
|
|
59
|
+
REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
|
|
60
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
TEST_CASE("theta a-not-b: exact mode half overlap", "[theta_a_not_b]") {
|
|
64
|
+
update_theta_sketch a = update_theta_sketch::builder().build();
|
|
65
|
+
int value = 0;
|
|
66
|
+
for (int i = 0; i < 1000; i++) a.update(value++);
|
|
67
|
+
|
|
68
|
+
update_theta_sketch b = update_theta_sketch::builder().build();
|
|
69
|
+
value = 500;
|
|
70
|
+
for (int i = 0; i < 1000; i++) b.update(value++);
|
|
71
|
+
|
|
72
|
+
theta_a_not_b a_not_b;
|
|
73
|
+
|
|
74
|
+
// unordered inputs, ordered result
|
|
75
|
+
compact_theta_sketch result = a_not_b.compute(a, b);
|
|
76
|
+
REQUIRE_FALSE(result.is_empty());
|
|
77
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
78
|
+
REQUIRE(result.is_ordered());
|
|
79
|
+
REQUIRE(result.get_estimate() == 500.0);
|
|
80
|
+
|
|
81
|
+
// unordered inputs, unordered result
|
|
82
|
+
result = a_not_b.compute(a, b, false);
|
|
83
|
+
REQUIRE_FALSE(result.is_empty());
|
|
84
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
85
|
+
REQUIRE_FALSE(result.is_ordered());
|
|
86
|
+
REQUIRE(result.get_estimate() == 500.0);
|
|
87
|
+
|
|
88
|
+
// ordered inputs
|
|
89
|
+
result = a_not_b.compute(a.compact(), b.compact());
|
|
90
|
+
REQUIRE_FALSE(result.is_empty());
|
|
91
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
92
|
+
REQUIRE(result.is_ordered());
|
|
93
|
+
REQUIRE(result.get_estimate() == 500.0);
|
|
94
|
+
|
|
95
|
+
// A is ordered, so the result is ordered regardless
|
|
96
|
+
result = a_not_b.compute(a.compact(), b, false);
|
|
97
|
+
REQUIRE_FALSE(result.is_empty());
|
|
98
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
99
|
+
REQUIRE(result.is_ordered());
|
|
100
|
+
REQUIRE(result.get_estimate() == 500.0);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
TEST_CASE("theta a-not-b: exact mode disjoint", "[theta_a_not_b]") {
|
|
104
|
+
update_theta_sketch a = update_theta_sketch::builder().build();
|
|
105
|
+
int value = 0;
|
|
106
|
+
for (int i = 0; i < 1000; i++) a.update(value++);
|
|
107
|
+
|
|
108
|
+
update_theta_sketch b = update_theta_sketch::builder().build();
|
|
109
|
+
for (int i = 0; i < 1000; i++) b.update(value++);
|
|
110
|
+
|
|
111
|
+
theta_a_not_b a_not_b;
|
|
112
|
+
|
|
113
|
+
// unordered inputs
|
|
114
|
+
compact_theta_sketch result = a_not_b.compute(a, b);
|
|
115
|
+
REQUIRE_FALSE(result.is_empty());
|
|
116
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
117
|
+
REQUIRE(result.get_estimate() == 1000.0);
|
|
118
|
+
|
|
119
|
+
// ordered inputs
|
|
120
|
+
result = a_not_b.compute(a.compact(), b.compact());
|
|
121
|
+
REQUIRE_FALSE(result.is_empty());
|
|
122
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
123
|
+
REQUIRE(result.get_estimate() == 1000.0);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
TEST_CASE("theta a-not-b: exact mode full overlap", "[theta_a_not_b]") {
|
|
127
|
+
update_theta_sketch sketch = update_theta_sketch::builder().build();
|
|
128
|
+
int value = 0;
|
|
129
|
+
for (int i = 0; i < 1000; i++) sketch.update(value++);
|
|
130
|
+
|
|
131
|
+
theta_a_not_b a_not_b;
|
|
132
|
+
|
|
133
|
+
// unordered inputs
|
|
134
|
+
compact_theta_sketch result = a_not_b.compute(sketch, sketch);
|
|
135
|
+
REQUIRE(result.is_empty());
|
|
136
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
137
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
138
|
+
|
|
139
|
+
// ordered inputs
|
|
140
|
+
result = a_not_b.compute(sketch.compact(), sketch.compact());
|
|
141
|
+
REQUIRE(result.is_empty());
|
|
142
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
143
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
TEST_CASE("theta a-not-b: estimation mode half overlap", "[theta_a_not_b]") {
|
|
147
|
+
update_theta_sketch a = update_theta_sketch::builder().build();
|
|
148
|
+
int value = 0;
|
|
149
|
+
for (int i = 0; i < 10000; i++) a.update(value++);
|
|
150
|
+
|
|
151
|
+
update_theta_sketch b = update_theta_sketch::builder().build();
|
|
152
|
+
value = 5000;
|
|
153
|
+
for (int i = 0; i < 10000; i++) b.update(value++);
|
|
154
|
+
|
|
155
|
+
theta_a_not_b a_not_b;
|
|
156
|
+
|
|
157
|
+
// unordered inputs
|
|
158
|
+
compact_theta_sketch result = a_not_b.compute(a, b);
|
|
159
|
+
REQUIRE_FALSE(result.is_empty());
|
|
160
|
+
REQUIRE(result.is_estimation_mode());
|
|
161
|
+
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
|
|
162
|
+
|
|
163
|
+
// ordered inputs
|
|
164
|
+
result = a_not_b.compute(a.compact(), b.compact());
|
|
165
|
+
REQUIRE_FALSE(result.is_empty());
|
|
166
|
+
REQUIRE(result.is_estimation_mode());
|
|
167
|
+
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
TEST_CASE("theta a-not-b: estimation mode disjoint", "[theta_a_not_b]") {
|
|
171
|
+
update_theta_sketch a = update_theta_sketch::builder().build();
|
|
172
|
+
int value = 0;
|
|
173
|
+
for (int i = 0; i < 10000; i++) a.update(value++);
|
|
174
|
+
|
|
175
|
+
update_theta_sketch b = update_theta_sketch::builder().build();
|
|
176
|
+
for (int i = 0; i < 10000; i++) b.update(value++);
|
|
177
|
+
|
|
178
|
+
theta_a_not_b a_not_b;
|
|
179
|
+
|
|
180
|
+
// unordered inputs
|
|
181
|
+
compact_theta_sketch result = a_not_b.compute(a, b);
|
|
182
|
+
REQUIRE_FALSE(result.is_empty());
|
|
183
|
+
REQUIRE(result.is_estimation_mode());
|
|
184
|
+
REQUIRE(result.get_estimate() == Approx(10000).margin(10000 * 0.02));
|
|
185
|
+
|
|
186
|
+
// ordered inputs
|
|
187
|
+
result = a_not_b.compute(a.compact(), b.compact());
|
|
188
|
+
REQUIRE_FALSE(result.is_empty());
|
|
189
|
+
REQUIRE(result.is_estimation_mode());
|
|
190
|
+
REQUIRE(result.get_estimate() == Approx(10000).margin(10000 * 0.02));
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
TEST_CASE("theta a-not-b: estimation mode full overlap", "[theta_a_not_b]") {
|
|
194
|
+
update_theta_sketch sketch = update_theta_sketch::builder().build();
|
|
195
|
+
int value = 0;
|
|
196
|
+
for (int i = 0; i < 10000; i++) sketch.update(value++);
|
|
197
|
+
|
|
198
|
+
theta_a_not_b a_not_b;
|
|
199
|
+
|
|
200
|
+
// unordered inputs
|
|
201
|
+
compact_theta_sketch result = a_not_b.compute(sketch, sketch);
|
|
202
|
+
REQUIRE_FALSE(result.is_empty());
|
|
203
|
+
REQUIRE(result.is_estimation_mode());
|
|
204
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
205
|
+
|
|
206
|
+
// ordered inputs
|
|
207
|
+
result = a_not_b.compute(sketch.compact(), sketch.compact());
|
|
208
|
+
REQUIRE_FALSE(result.is_empty());
|
|
209
|
+
REQUIRE(result.is_estimation_mode());
|
|
210
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
TEST_CASE("theta a-not-b: seed mismatch", "[theta_a_not_b]") {
|
|
214
|
+
update_theta_sketch sketch = update_theta_sketch::builder().build();
|
|
215
|
+
sketch.update(1); // non-empty should not be ignored
|
|
216
|
+
theta_a_not_b a_not_b(123);
|
|
217
|
+
REQUIRE_THROWS_AS(a_not_b.compute(sketch, sketch), std::invalid_argument);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
TEST_CASE("theta a-not-b: issue #152", "[theta_a_not_b]") {
|
|
221
|
+
update_theta_sketch a = update_theta_sketch::builder().build();
|
|
222
|
+
int value = 0;
|
|
223
|
+
for (int i = 0; i < 10000; i++) a.update(value++);
|
|
224
|
+
|
|
225
|
+
update_theta_sketch b = update_theta_sketch::builder().build();
|
|
226
|
+
value = 5000;
|
|
227
|
+
for (int i = 0; i < 25000; i++) b.update(value++);
|
|
228
|
+
|
|
229
|
+
theta_a_not_b a_not_b;
|
|
230
|
+
|
|
231
|
+
// unordered inputs
|
|
232
|
+
compact_theta_sketch result = a_not_b.compute(a, b);
|
|
233
|
+
REQUIRE_FALSE(result.is_empty());
|
|
234
|
+
REQUIRE(result.is_estimation_mode());
|
|
235
|
+
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.03));
|
|
236
|
+
|
|
237
|
+
// ordered inputs
|
|
238
|
+
result = a_not_b.compute(a.compact(), b.compact());
|
|
239
|
+
REQUIRE_FALSE(result.is_empty());
|
|
240
|
+
REQUIRE(result.is_estimation_mode());
|
|
241
|
+
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.03));
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
} /* namespace datasketches */
|