datasketches 0.1.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +10 -0
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +18 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +13 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +20 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +116 -105
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +22 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +140 -101
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +20 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -16
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +21 -21
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +102 -105
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +141 -125
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +5 -5
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +81 -109
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +25 -24
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +89 -105
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +130 -165
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +21 -22
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +88 -83
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +34 -45
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +7 -8
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +41 -52
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +7 -8
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +220 -251
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +42 -42
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +36 -38
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +47 -44
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +62 -87
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +121 -128
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +25 -53
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +8 -8
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +36 -36
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +28 -28
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +37 -37
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +57 -61
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +40 -25
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +50 -6
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +164 -136
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +178 -88
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +12 -6
- data/vendor/datasketches-cpp/python/README.md +52 -49
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -6
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +4 -2
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +38 -28
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -2
- data/vendor/datasketches-cpp/python/tests/kll_test.py +5 -5
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +18 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +488 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +19 -13
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +130 -127
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +41 -49
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -44
- data/vendor/datasketches-cpp/setup.py +11 -6
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +3 -2
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +11 -4
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +26 -28
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +24 -36
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +163 -256
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +250 -651
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +10 -21
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +44 -30
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +60 -5
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +74 -235
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +57 -70
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +18 -21
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +13 -16
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +7 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +3 -3
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +13 -16
- metadata +51 -36
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -20,103 +20,70 @@
|
|
20
20
|
#ifndef THETA_UNION_HPP_
|
21
21
|
#define THETA_UNION_HPP_
|
22
22
|
|
23
|
-
#include
|
24
|
-
#include <functional>
|
25
|
-
#include <climits>
|
26
|
-
|
23
|
+
#include "serde.hpp"
|
27
24
|
#include "theta_sketch.hpp"
|
25
|
+
#include "theta_union_base.hpp"
|
28
26
|
|
29
27
|
namespace datasketches {
|
30
28
|
|
31
|
-
|
32
|
-
* author Alexander Saydakov
|
33
|
-
* author Lee Rhodes
|
34
|
-
* author Kevin Lang
|
35
|
-
*/
|
36
|
-
|
37
|
-
template<typename A>
|
29
|
+
template<typename Allocator = std::allocator<uint64_t>>
|
38
30
|
class theta_union_alloc {
|
39
31
|
public:
|
40
|
-
|
32
|
+
using Entry = uint64_t;
|
33
|
+
using ExtractKey = trivial_extract_key;
|
34
|
+
using Sketch = theta_sketch_alloc<Allocator>;
|
35
|
+
using CompactSketch = compact_theta_sketch_alloc<Allocator>;
|
36
|
+
using resize_factor = theta_constants::resize_factor;
|
37
|
+
|
38
|
+
struct nop_policy {
|
39
|
+
void operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
|
40
|
+
unused(internal_entry);
|
41
|
+
unused(incoming_entry);
|
42
|
+
}
|
43
|
+
};
|
44
|
+
using State = theta_union_base<Entry, ExtractKey, nop_policy, Sketch, CompactSketch, Allocator>;
|
41
45
|
|
42
46
|
// No constructor here. Use builder instead.
|
47
|
+
class builder;
|
43
48
|
|
44
49
|
/**
|
45
50
|
* This method is to update the union with a given sketch
|
46
51
|
* @param sketch to update the union with
|
47
52
|
*/
|
48
|
-
|
53
|
+
template<typename FwdSketch>
|
54
|
+
void update(FwdSketch&& sketch);
|
49
55
|
|
50
56
|
/**
|
51
57
|
* This method produces a copy of the current state of the union as a compact sketch.
|
52
58
|
* @param ordered optional flag to specify if ordered sketch should be produced
|
53
59
|
* @return the result of the union
|
54
60
|
*/
|
55
|
-
|
61
|
+
CompactSketch get_result(bool ordered = true) const;
|
56
62
|
|
57
63
|
private:
|
58
|
-
|
59
|
-
uint64_t theta_;
|
60
|
-
update_theta_sketch_alloc<A> state_;
|
64
|
+
State state_;
|
61
65
|
|
62
66
|
// for builder
|
63
|
-
theta_union_alloc(uint64_t theta,
|
67
|
+
theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Allocator& allocator);
|
64
68
|
};
|
65
69
|
|
66
|
-
// builder
|
67
|
-
|
68
70
|
template<typename A>
|
69
|
-
class theta_union_alloc<A>::builder {
|
71
|
+
class theta_union_alloc<A>::builder: public theta_base_builder<builder, A> {
|
70
72
|
public:
|
71
|
-
|
72
|
-
|
73
|
-
/**
|
74
|
-
* Set log2(k), where k is a nominal number of entries in the sketch
|
75
|
-
* @param lg_k base 2 logarithm of nominal number of entries
|
76
|
-
* @return this builder
|
77
|
-
*/
|
78
|
-
builder& set_lg_k(uint8_t lg_k);
|
79
|
-
|
80
|
-
/**
|
81
|
-
* Set resize factor for the internal hash table (defaults to 8)
|
82
|
-
* @param rf resize factor
|
83
|
-
* @return this builder
|
84
|
-
*/
|
85
|
-
builder& set_resize_factor(resize_factor rf);
|
86
|
-
|
87
|
-
/**
|
88
|
-
* Set sampling probability (initial theta). The default is 1, so the sketch retains
|
89
|
-
* all entries until it reaches the limit, at which point it goes into the estimation mode
|
90
|
-
* and reduces the effective sampling probability (theta) as necessary.
|
91
|
-
* @param p sampling probability
|
92
|
-
* @return this builder
|
93
|
-
*/
|
94
|
-
builder& set_p(float p);
|
95
|
-
|
96
|
-
/**
|
97
|
-
* Set the seed for the hash function. Should be used carefully if needed.
|
98
|
-
* Sketches produced with different seed are not compatible
|
99
|
-
* and cannot be mixed in set operations.
|
100
|
-
* @param seed hash seed
|
101
|
-
* @return this builder
|
102
|
-
*/
|
103
|
-
builder& set_seed(uint64_t seed);
|
73
|
+
builder(const A& allocator = A());
|
104
74
|
|
105
75
|
/**
|
106
76
|
* This is to create an instance of the union with predefined parameters.
|
107
|
-
* @return
|
77
|
+
* @return an instance of the union
|
108
78
|
*/
|
109
79
|
theta_union_alloc<A> build() const;
|
110
|
-
|
111
|
-
private:
|
112
|
-
typename update_theta_sketch_alloc<A>::builder sketch_builder;
|
113
80
|
};
|
114
81
|
|
115
82
|
// alias with default allocator for convenience
|
116
|
-
|
83
|
+
using theta_union = theta_union_alloc<std::allocator<uint64_t>>;
|
117
84
|
|
118
85
|
} /* namespace datasketches */
|
119
86
|
|
120
87
|
#include "theta_union_impl.hpp"
|
121
88
|
|
122
|
-
#
|
89
|
+
#endif
|
@@ -17,6 +17,9 @@
|
|
17
17
|
* under the License.
|
18
18
|
*/
|
19
19
|
|
20
|
+
#ifndef THETA_UNION_BASE_IMPL_HPP_
|
21
|
+
#define THETA_UNION_BASE_IMPL_HPP_
|
22
|
+
|
20
23
|
#include <algorithm>
|
21
24
|
|
22
25
|
#include "conditional_forward.hpp"
|
@@ -40,7 +43,7 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
|
|
40
43
|
if (sketch.get_theta64() < union_theta_) union_theta_ = sketch.get_theta64();
|
41
44
|
for (auto& entry: sketch) {
|
42
45
|
const uint64_t hash = EK()(entry);
|
43
|
-
if (hash < union_theta_) {
|
46
|
+
if (hash < union_theta_ && hash < table_.theta_) {
|
44
47
|
auto result = table_.find(hash);
|
45
48
|
if (!result.second) {
|
46
49
|
table_.insert(result.first, conditional_forward<SS>(entry));
|
@@ -82,3 +85,5 @@ const P& theta_union_base<EN, EK, P, S, CS, A>::get_policy() const {
|
|
82
85
|
}
|
83
86
|
|
84
87
|
} /* namespace datasketches */
|
88
|
+
|
89
|
+
#endif
|
@@ -22,86 +22,30 @@
|
|
22
22
|
|
23
23
|
namespace datasketches {
|
24
24
|
|
25
|
-
/*
|
26
|
-
* author Alexander Saydakov
|
27
|
-
* author Lee Rhodes
|
28
|
-
* author Kevin Lang
|
29
|
-
*/
|
30
|
-
|
31
|
-
template<typename A>
|
32
|
-
theta_union_alloc<A>::theta_union_alloc(uint64_t theta, update_theta_sketch_alloc<A>&& state):
|
33
|
-
is_empty_(true), theta_(theta), state_(std::move(state)) {}
|
34
|
-
|
35
|
-
template<typename A>
|
36
|
-
void theta_union_alloc<A>::update(const theta_sketch_alloc<A>& sketch) {
|
37
|
-
if (sketch.is_empty()) return;
|
38
|
-
if (sketch.get_seed_hash() != state_.get_seed_hash()) throw std::invalid_argument("seed hash mismatch");
|
39
|
-
is_empty_ = false;
|
40
|
-
if (sketch.get_theta64() < theta_) theta_ = sketch.get_theta64();
|
41
|
-
if (sketch.is_ordered()) {
|
42
|
-
for (auto hash: sketch) {
|
43
|
-
if (hash >= theta_) break; // early stop
|
44
|
-
state_.internal_update(hash);
|
45
|
-
}
|
46
|
-
} else {
|
47
|
-
for (auto hash: sketch) if (hash < theta_) state_.internal_update(hash);
|
48
|
-
}
|
49
|
-
if (state_.get_theta64() < theta_) theta_ = state_.get_theta64();
|
50
|
-
}
|
51
|
-
|
52
25
|
template<typename A>
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
if (theta_ >= state_.theta_ && state_.get_num_retained() <= nom_num_keys) return state_.compact(ordered);
|
57
|
-
uint64_t theta = std::min(theta_, state_.get_theta64());
|
58
|
-
vector_u64<A> keys(state_.get_num_retained());
|
59
|
-
uint32_t num_keys = 0;
|
60
|
-
for (auto key: state_) {
|
61
|
-
if (key < theta) keys[num_keys++] = key;
|
62
|
-
}
|
63
|
-
if (num_keys > nom_num_keys) {
|
64
|
-
std::nth_element(keys.begin(), keys.begin() + nom_num_keys, keys.begin() + num_keys);
|
65
|
-
theta = keys[nom_num_keys];
|
66
|
-
num_keys = nom_num_keys;
|
67
|
-
}
|
68
|
-
if (num_keys != state_.get_num_retained()) {
|
69
|
-
keys.resize(num_keys);
|
70
|
-
}
|
71
|
-
if (ordered) std::sort(keys.begin(), keys.end());
|
72
|
-
return compact_theta_sketch_alloc<A>(false, theta, std::move(keys), state_.get_seed_hash(), ordered);
|
73
|
-
}
|
74
|
-
|
75
|
-
// builder
|
26
|
+
theta_union_alloc<A>::theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator):
|
27
|
+
state_(lg_cur_size, lg_nom_size, rf, theta, seed, nop_policy(), allocator)
|
28
|
+
{}
|
76
29
|
|
77
30
|
template<typename A>
|
78
|
-
typename
|
79
|
-
|
80
|
-
|
31
|
+
template<typename SS>
|
32
|
+
void theta_union_alloc<A>::update(SS&& sketch) {
|
33
|
+
state_.update(std::forward<SS>(sketch));
|
81
34
|
}
|
82
35
|
|
83
36
|
template<typename A>
|
84
|
-
|
85
|
-
|
86
|
-
return *this;
|
37
|
+
auto theta_union_alloc<A>::get_result(bool ordered) const -> CompactSketch {
|
38
|
+
return state_.get_result(ordered);
|
87
39
|
}
|
88
40
|
|
89
41
|
template<typename A>
|
90
|
-
|
91
|
-
sketch_builder.set_p(p);
|
92
|
-
return *this;
|
93
|
-
}
|
94
|
-
|
95
|
-
template<typename A>
|
96
|
-
typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_seed(uint64_t seed) {
|
97
|
-
sketch_builder.set_seed(seed);
|
98
|
-
return *this;
|
99
|
-
}
|
42
|
+
theta_union_alloc<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
|
100
43
|
|
101
44
|
template<typename A>
|
102
|
-
|
103
|
-
|
104
|
-
|
45
|
+
auto theta_union_alloc<A>::builder::build() const -> theta_union_alloc {
|
46
|
+
return theta_union_alloc(
|
47
|
+
this->starting_sub_multiple(this->lg_k_ + 1, this->MIN_LG_K, static_cast<uint8_t>(this->rf_)),
|
48
|
+
this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
|
105
49
|
}
|
106
50
|
|
107
51
|
} /* namespace datasketches */
|
@@ -34,7 +34,7 @@ namespace datasketches {
|
|
34
34
|
template<
|
35
35
|
typename Entry,
|
36
36
|
typename ExtractKey,
|
37
|
-
typename Allocator
|
37
|
+
typename Allocator
|
38
38
|
>
|
39
39
|
struct theta_update_sketch_base {
|
40
40
|
using resize_factor = theta_constants::resize_factor;
|
@@ -53,6 +53,8 @@ struct theta_update_sketch_base {
|
|
53
53
|
inline uint64_t hash_and_screen(const void* data, size_t length);
|
54
54
|
|
55
55
|
inline std::pair<iterator, bool> find(uint64_t key) const;
|
56
|
+
static inline std::pair<iterator, bool> find(Entry* entries, uint8_t lg_size, uint64_t key);
|
57
|
+
|
56
58
|
|
57
59
|
template<typename FwdEntry>
|
58
60
|
inline void insert(iterator it, FwdEntry&& entry);
|
@@ -92,11 +94,14 @@ struct theta_update_sketch_base {
|
|
92
94
|
template<typename Derived, typename Allocator>
|
93
95
|
class theta_base_builder {
|
94
96
|
public:
|
97
|
+
// TODO: Redundant and deprecated. Will be removed in next major verison release.
|
95
98
|
using resize_factor = theta_constants::resize_factor;
|
96
99
|
static const uint8_t MIN_LG_K = theta_constants::MIN_LG_K;
|
97
100
|
static const uint8_t MAX_LG_K = theta_constants::MAX_LG_K;
|
98
|
-
|
99
|
-
|
101
|
+
// TODO: The following defaults are redundant and deprecated. Will be removed in the
|
102
|
+
// next major version release
|
103
|
+
static const uint8_t DEFAULT_LG_K = theta_constants::DEFAULT_LG_K;
|
104
|
+
static const resize_factor DEFAULT_RESIZE_FACTOR = theta_constants::DEFAULT_RESIZE_FACTOR;
|
100
105
|
|
101
106
|
/**
|
102
107
|
* Creates and instance of the builder with default parameters.
|
@@ -147,7 +152,7 @@ protected:
|
|
147
152
|
static uint8_t starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf);
|
148
153
|
};
|
149
154
|
|
150
|
-
// key
|
155
|
+
// key extractor
|
151
156
|
|
152
157
|
struct trivial_extract_key {
|
153
158
|
template<typename T>
|
@@ -156,17 +161,7 @@ struct trivial_extract_key {
|
|
156
161
|
}
|
157
162
|
};
|
158
163
|
|
159
|
-
|
160
|
-
struct pair_extract_key {
|
161
|
-
K& operator()(std::pair<K, V>& entry) const {
|
162
|
-
return entry.first;
|
163
|
-
}
|
164
|
-
const K& operator()(const std::pair<K, V>& entry) const {
|
165
|
-
return entry.first;
|
166
|
-
}
|
167
|
-
};
|
168
|
-
|
169
|
-
// not zero
|
164
|
+
// key not zero
|
170
165
|
|
171
166
|
template<typename Entry, typename ExtractKey>
|
172
167
|
class key_not_zero {
|
@@ -195,12 +190,6 @@ static inline uint64_t compute_hash(const void* data, size_t length, uint64_t se
|
|
195
190
|
return (hashes.h1 >> 1); // Java implementation does unsigned shift >>> to make values positive
|
196
191
|
}
|
197
192
|
|
198
|
-
static inline uint16_t compute_seed_hash(uint64_t seed) {
|
199
|
-
HashState hashes;
|
200
|
-
MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
|
201
|
-
return hashes.h1;
|
202
|
-
}
|
203
|
-
|
204
193
|
// iterators
|
205
194
|
|
206
195
|
template<typename Entry, typename ExtractKey>
|
@@ -17,6 +17,9 @@
|
|
17
17
|
* under the License.
|
18
18
|
*/
|
19
19
|
|
20
|
+
#ifndef THETA_UPDATE_SKETCH_BASE_IMPL_HPP_
|
21
|
+
#define THETA_UPDATE_SKETCH_BASE_IMPL_HPP_
|
22
|
+
|
20
23
|
#include <iostream>
|
21
24
|
#include <sstream>
|
22
25
|
#include <algorithm>
|
@@ -36,7 +39,7 @@ seed_(seed),
|
|
36
39
|
entries_(nullptr)
|
37
40
|
{
|
38
41
|
if (lg_cur_size > 0) {
|
39
|
-
const size_t size =
|
42
|
+
const size_t size = 1ULL << lg_cur_size;
|
40
43
|
entries_ = allocator_.allocate(size);
|
41
44
|
for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
|
42
45
|
}
|
@@ -55,7 +58,7 @@ seed_(other.seed_),
|
|
55
58
|
entries_(nullptr)
|
56
59
|
{
|
57
60
|
if (other.entries_ != nullptr) {
|
58
|
-
const size_t size =
|
61
|
+
const size_t size = 1ULL << lg_cur_size_;
|
59
62
|
entries_ = allocator_.allocate(size);
|
60
63
|
for (size_t i = 0; i < size; ++i) {
|
61
64
|
if (EK()(other.entries_[i]) != 0) {
|
@@ -69,7 +72,7 @@ entries_(nullptr)
|
|
69
72
|
|
70
73
|
template<typename EN, typename EK, typename A>
|
71
74
|
theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(theta_update_sketch_base&& other) noexcept:
|
72
|
-
allocator_(other.allocator_),
|
75
|
+
allocator_(std::move(other.allocator_)),
|
73
76
|
is_empty_(other.is_empty_),
|
74
77
|
lg_cur_size_(other.lg_cur_size_),
|
75
78
|
lg_nom_size_(other.lg_nom_size_),
|
@@ -86,7 +89,7 @@ template<typename EN, typename EK, typename A>
|
|
86
89
|
theta_update_sketch_base<EN, EK, A>::~theta_update_sketch_base()
|
87
90
|
{
|
88
91
|
if (entries_ != nullptr) {
|
89
|
-
const size_t size =
|
92
|
+
const size_t size = 1ULL << lg_cur_size_;
|
90
93
|
for (size_t i = 0; i < size; ++i) {
|
91
94
|
if (EK()(entries_[i]) != 0) entries_[i].~EN();
|
92
95
|
}
|
@@ -133,18 +136,23 @@ uint64_t theta_update_sketch_base<EN, EK, A>::hash_and_screen(const void* data,
|
|
133
136
|
|
134
137
|
template<typename EN, typename EK, typename A>
|
135
138
|
auto theta_update_sketch_base<EN, EK, A>::find(uint64_t key) const -> std::pair<iterator, bool> {
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
+
return find(entries_, lg_cur_size_, key);
|
140
|
+
}
|
141
|
+
|
142
|
+
template<typename EN, typename EK, typename A>
|
143
|
+
auto theta_update_sketch_base<EN, EK, A>::find(EN* entries, uint8_t lg_size, uint64_t key) -> std::pair<iterator, bool> {
|
144
|
+
const uint32_t size = 1 << lg_size;
|
145
|
+
const uint32_t mask = size - 1;
|
146
|
+
const uint32_t stride = get_stride(key, lg_size);
|
139
147
|
uint32_t index = static_cast<uint32_t>(key) & mask;
|
140
148
|
// search for duplicate or zero
|
141
149
|
const uint32_t loop_index = index;
|
142
150
|
do {
|
143
|
-
const uint64_t probe = EK()(
|
151
|
+
const uint64_t probe = EK()(entries[index]);
|
144
152
|
if (probe == 0) {
|
145
|
-
return std::pair<iterator, bool>(&
|
153
|
+
return std::pair<iterator, bool>(&entries[index], false);
|
146
154
|
} else if (probe == key) {
|
147
|
-
return std::pair<iterator, bool>(&
|
155
|
+
return std::pair<iterator, bool>(&entries[index], true);
|
148
156
|
}
|
149
157
|
index = (index + stride) & mask;
|
150
158
|
} while (index != loop_index);
|
@@ -172,13 +180,13 @@ auto theta_update_sketch_base<EN, EK, A>::begin() const -> iterator {
|
|
172
180
|
|
173
181
|
template<typename EN, typename EK, typename A>
|
174
182
|
auto theta_update_sketch_base<EN, EK, A>::end() const -> iterator {
|
175
|
-
return &entries_[
|
183
|
+
return &entries_[1ULL << lg_cur_size_];
|
176
184
|
}
|
177
185
|
|
178
186
|
template<typename EN, typename EK, typename A>
|
179
187
|
uint32_t theta_update_sketch_base<EN, EK, A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
|
180
188
|
const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
|
181
|
-
return std::floor(fraction * (1 << lg_cur_size));
|
189
|
+
return static_cast<uint32_t>(std::floor(fraction * (1 << lg_cur_size)));
|
182
190
|
}
|
183
191
|
|
184
192
|
template<typename EN, typename EK, typename A>
|
@@ -189,29 +197,29 @@ uint32_t theta_update_sketch_base<EN, EK, A>::get_stride(uint64_t key, uint8_t l
|
|
189
197
|
|
190
198
|
template<typename EN, typename EK, typename A>
|
191
199
|
void theta_update_sketch_base<EN, EK, A>::resize() {
|
192
|
-
const size_t old_size =
|
193
|
-
const uint8_t
|
194
|
-
const
|
195
|
-
|
196
|
-
|
197
|
-
EN* old_entries = entries_;
|
198
|
-
entries_ = allocator_.allocate(new_size);
|
199
|
-
for (size_t i = 0; i < new_size; ++i) EK()(entries_[i]) = 0;
|
200
|
-
num_entries_ = 0;
|
200
|
+
const size_t old_size = 1ULL << lg_cur_size_;
|
201
|
+
const uint8_t lg_new_size = std::min<uint8_t>(lg_cur_size_ + static_cast<uint8_t>(rf_), lg_nom_size_ + 1);
|
202
|
+
const size_t new_size = 1ULL << lg_new_size;
|
203
|
+
EN* new_entries = allocator_.allocate(new_size);
|
204
|
+
for (size_t i = 0; i < new_size; ++i) EK()(new_entries[i]) = 0;
|
201
205
|
for (size_t i = 0; i < old_size; ++i) {
|
202
|
-
const uint64_t key = EK()(
|
206
|
+
const uint64_t key = EK()(entries_[i]);
|
203
207
|
if (key != 0) {
|
204
|
-
|
205
|
-
|
208
|
+
// always finds an empty slot in a larger table
|
209
|
+
new (find(new_entries, lg_new_size, key).first) EN(std::move(entries_[i]));
|
210
|
+
entries_[i].~EN();
|
211
|
+
EK()(entries_[i]) = 0;
|
206
212
|
}
|
207
213
|
}
|
208
|
-
|
214
|
+
std::swap(entries_, new_entries);
|
215
|
+
lg_cur_size_ = lg_new_size;
|
216
|
+
allocator_.deallocate(new_entries, old_size);
|
209
217
|
}
|
210
218
|
|
211
219
|
// assumes number of entries > nominal size
|
212
220
|
template<typename EN, typename EK, typename A>
|
213
221
|
void theta_update_sketch_base<EN, EK, A>::rebuild() {
|
214
|
-
const size_t size =
|
222
|
+
const size_t size = 1ULL << lg_cur_size_;
|
215
223
|
const uint32_t nominal_size = 1 << lg_nom_size_;
|
216
224
|
|
217
225
|
// empty entries have uninitialized payloads
|
@@ -224,10 +232,10 @@ void theta_update_sketch_base<EN, EK, A>::rebuild() {
|
|
224
232
|
const size_t num_old_entries = num_entries_;
|
225
233
|
entries_ = allocator_.allocate(size);
|
226
234
|
for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
|
227
|
-
num_entries_ =
|
235
|
+
num_entries_ = nominal_size;
|
228
236
|
// relies on consolidating non-empty entries to the front
|
229
237
|
for (size_t i = 0; i < nominal_size; ++i) {
|
230
|
-
|
238
|
+
new (find(EK()(old_entries[i])).first) EN(std::move(old_entries[i]));
|
231
239
|
old_entries[i].~EN();
|
232
240
|
}
|
233
241
|
for (size_t i = nominal_size; i < num_old_entries; ++i) old_entries[i].~EN();
|
@@ -263,7 +271,11 @@ void theta_update_sketch_base<EN, EK, A>::consolidate_non_empty(EN* entries, siz
|
|
263
271
|
|
264
272
|
template<typename Derived, typename Allocator>
|
265
273
|
theta_base_builder<Derived, Allocator>::theta_base_builder(const Allocator& allocator):
|
266
|
-
allocator_(allocator),
|
274
|
+
allocator_(allocator),
|
275
|
+
lg_k_(theta_constants::DEFAULT_LG_K),
|
276
|
+
rf_(theta_constants::DEFAULT_RESIZE_FACTOR),
|
277
|
+
p_(1),
|
278
|
+
seed_(DEFAULT_SEED) {}
|
267
279
|
|
268
280
|
template<typename Derived, typename Allocator>
|
269
281
|
Derived& theta_base_builder<Derived, Allocator>::set_lg_k(uint8_t lg_k) {
|
@@ -298,7 +310,7 @@ Derived& theta_base_builder<Derived, Allocator>::set_seed(uint64_t seed) {
|
|
298
310
|
|
299
311
|
template<typename Derived, typename Allocator>
|
300
312
|
uint64_t theta_base_builder<Derived, Allocator>::starting_theta() const {
|
301
|
-
if (p_ < 1) return theta_constants::MAX_THETA * p_;
|
313
|
+
if (p_ < 1) return static_cast<uint64_t>(theta_constants::MAX_THETA * p_);
|
302
314
|
return theta_constants::MAX_THETA;
|
303
315
|
}
|
304
316
|
|
@@ -387,3 +399,5 @@ auto theta_const_iterator<Entry, ExtractKey>::operator*() const -> const Entry&
|
|
387
399
|
}
|
388
400
|
|
389
401
|
} /* namespace datasketches */
|
402
|
+
|
403
|
+
#endif
|
@@ -37,7 +37,7 @@ TEST_CASE("theta a-not-b: empty", "[theta_a_not_b]") {
|
|
37
37
|
TEST_CASE("theta a-not-b: non empty no retained keys", "[theta_a_not_b]") {
|
38
38
|
update_theta_sketch a = update_theta_sketch::builder().build();
|
39
39
|
a.update(1);
|
40
|
-
update_theta_sketch b = update_theta_sketch::builder().set_p(0.
|
40
|
+
update_theta_sketch b = update_theta_sketch::builder().set_p(0.001f).build();
|
41
41
|
theta_a_not_b a_not_b;
|
42
42
|
|
43
43
|
// B is still empty
|
@@ -167,6 +167,28 @@ TEST_CASE("theta a-not-b: estimation mode half overlap", "[theta_a_not_b]") {
|
|
167
167
|
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
|
168
168
|
}
|
169
169
|
|
170
|
+
TEST_CASE("theta a-not-b: estimation mode half overlap wrapped compact", "[theta_a_not_b]") {
|
171
|
+
update_theta_sketch a = update_theta_sketch::builder().build();
|
172
|
+
int value = 0;
|
173
|
+
for (int i = 0; i < 10000; i++) a.update(value++);
|
174
|
+
auto bytes_a = a.compact().serialize();
|
175
|
+
|
176
|
+
update_theta_sketch b = update_theta_sketch::builder().build();
|
177
|
+
value = 5000;
|
178
|
+
for (int i = 0; i < 10000; i++) b.update(value++);
|
179
|
+
auto bytes_b = b.compact().serialize();
|
180
|
+
|
181
|
+
theta_a_not_b a_not_b;
|
182
|
+
|
183
|
+
auto result = a_not_b.compute(
|
184
|
+
wrapped_compact_theta_sketch::wrap(bytes_a.data(), bytes_a.size()),
|
185
|
+
wrapped_compact_theta_sketch::wrap(bytes_b.data(), bytes_b.size())
|
186
|
+
);
|
187
|
+
REQUIRE_FALSE(result.is_empty());
|
188
|
+
REQUIRE(result.is_estimation_mode());
|
189
|
+
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
|
190
|
+
}
|
191
|
+
|
170
192
|
TEST_CASE("theta a-not-b: estimation mode disjoint", "[theta_a_not_b]") {
|
171
193
|
update_theta_sketch a = update_theta_sketch::builder().build();
|
172
194
|
int value = 0;
|
@@ -48,7 +48,7 @@ TEST_CASE("theta intersection: empty", "[theta_intersection]") {
|
|
48
48
|
}
|
49
49
|
|
50
50
|
TEST_CASE("theta intersection: non empty no retained keys", "[theta_intersection]") {
|
51
|
-
update_theta_sketch sketch = update_theta_sketch::builder().set_p(0.
|
51
|
+
update_theta_sketch sketch = update_theta_sketch::builder().set_p(0.001f).build();
|
52
52
|
sketch.update(1);
|
53
53
|
theta_intersection intersection;
|
54
54
|
intersection.update(sketch);
|
@@ -174,6 +174,26 @@ TEST_CASE("theta intersection: estimation mode half overlap ordered", "[theta_in
|
|
174
174
|
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
|
175
175
|
}
|
176
176
|
|
177
|
+
TEST_CASE("theta intersection: estimation mode half overlap ordered wrapped compact", "[theta_intersection]") {
|
178
|
+
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
179
|
+
int value = 0;
|
180
|
+
for (int i = 0; i < 10000; i++) sketch1.update(value++);
|
181
|
+
auto bytes1 = sketch1.compact().serialize();
|
182
|
+
|
183
|
+
update_theta_sketch sketch2 = update_theta_sketch::builder().build();
|
184
|
+
value = 5000;
|
185
|
+
for (int i = 0; i < 10000; i++) sketch2.update(value++);
|
186
|
+
auto bytes2 = sketch2.compact().serialize();
|
187
|
+
|
188
|
+
theta_intersection intersection;
|
189
|
+
intersection.update(wrapped_compact_theta_sketch::wrap(bytes1.data(), bytes1.size()));
|
190
|
+
intersection.update(wrapped_compact_theta_sketch::wrap(bytes2.data(), bytes2.size()));
|
191
|
+
compact_theta_sketch result = intersection.get_result();
|
192
|
+
REQUIRE_FALSE(result.is_empty());
|
193
|
+
REQUIRE(result.is_estimation_mode());
|
194
|
+
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
|
195
|
+
}
|
196
|
+
|
177
197
|
TEST_CASE("theta intersection: estimation mode disjoint unordered", "[theta_intersection]") {
|
178
198
|
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
179
199
|
int value = 0;
|